Git browser: Generator/
This page presents code associated with the module/unit named above.
Generator/tr-static-site-generator-img.sqlite3.schema
CREATE TABLE IF NOT EXISTS images ( sha256 varchar(64) unique not null, epoch integer not null, image varchar(256) not null); CREATE UNIQUE INDEX IF NOT EXISTS fingerprint on images (sha256);
Generator/HTML/navigation.html
<div class="navigation"> <ul> <li><a href="/index.shtml">Home</a></li> <li><a href="/about.shtml">About</a></li> <li><a href="/irc.shtml">IRC</a></li> <li><a href="gemini://gemini.techrights.org/">Gemini Edition</a></li> <li><a href="/search/query">Search</a></li> <li><a href="/feed.xml">Feed</a></li> </ul> </div>
Generator/HTML/index.shtml
<!DOCTYPE html>
<html>
<head>
<!-- 1 -->
<meta name="dc.date.modified" content="2023-09-23T12:35" />
<meta name="dc.description"
content="Looking ahead, we'll probably produce more stories than before because lessening the underlying complexity lets us focus on substance" />
<title>Techrights — Welcome to the New Techrights</title>
<meta name="dc.title"
content="Welcome to Techrights" />
<link rel="stylesheet" href="/CSS/techrights.css"
media="screen" type="text/css" />
<link rel="alternate" type="application/rss+xml"
href="/feed.xml" title="Techrights" />
</head>
<body>
<a class="skip-link" href="#main">Skip to content</a>
<!--#include virtual="/header.html"-->
<!--#include virtual="/feeds.html"-->
<div class="post">
<!--#include virtual="/navigation.html"-->
<h1>Welcome to Techrights</h1>
<p>Welcome to the current iteration of <em>Techrights</em>, online since 2006 with a major infrastructural upgrade in late 2022.
Here you will find our latest posts.
In addition to HTTP/HTTPS here, <em>Techrights</em> is also available via <a href="gemini://gemini.techrights.org/">Gemini</a> and <a href="/ipfs/">IPFS</a> editions, though the IPFS service is on hiatus for the foreseeable future.
Just the other year, <a href="/upgrades">Techrights upgraded</a> from a heavy content management system to a much lighter and lower maintenance static site generator which produces both HTML for the WWW and GemText for the Gemini space.
The site is mostly prose, but there are also quite a few <a href="/videos/archive/">topical videos in the Techrights archive</a>.
A complete, chronological index of current and past articles is also available, from the <a href="/browse/latest.shtml">latest</a> to the <a href="/browse/index.shtml">oldest</a>.
</p>
<p>
Recent posts are syndicated and can be <a href="/feed.xml" title="another RSS feed with post bodies is available on request">tracked via RSS</a>.
An audio file with <a href="/morse/headlines.flac">the latest headlines in Morse</a> is updated every four hours.
</p>
<p>
Enter our self-hosted <abbr title="Internet Relay Chat (IRC), an application layer protocol that facilitates communication in the form of text">IRC</abbr> channel to <a href="/irc.shtml" title="Enter IRC channels">contact us</a> or have a chat about information communication technology and digital rights.
Or, for <abbr title="Use end-to-end encryption to ensure only the recipient of messages and accompanying material can see the contents">privacy</abbr>, take contact via <a href="http://schestowitz.com/PGP/" title="PGP Key">e-mail encrypted with OpenPGP</a>.
</p>
<blockquote>
<dl>
<dt><em>"Perfection is achieved, not when there is nothing more to add, but when there is nothing left to take away."</em></dt>
<dd> ~ Antoine de Saint-Exupery </dd>
</dl>
</blockquote>
<p>
</p>
</div>
<h1 id="main">Recent Techrights' Posts</h1>
<!--#include virtual="/latest-news.html"-->
<!--#include virtual="/footer.html"-->
</body>
</html>
Generator/HTML/rrrrrr.shtml
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>R.R.R.R.R.R.</title>
<meta name="dc.date.created" content="20220712" />
<meta name="dc.description" content="Roy and Rianne's Righteously Royalty-free RSS Reader (R.R.R.R.R.R.)" />
<link rel="stylesheet" href="/CSS/techrights.css"
media="screen" type="text/css" />
</head>
<body>
<!--#include virtual="/header.html"-->
<!--#include virtual="/navigation.html"-->
<!--#include virtual="/feeds.html"-->
<div class="post">
<h1>R.R.R.R.R.R.</h1>
<p align="center"><a href="/i/2025/09/Righteously_Royalty_free_RSS_Reader.png"><img alt="R.R.R.R.R.R." src="/i/2025/09/Righteously_Royalty_free_RSS_Reader.png" width="200" /></a></p>
<em>Roy and Rianne's Righteously Royalty-free RSS Reader (R.R.R.R.R.R.)</em>
<p><b>What's R.R.R.R.R.R.</b>: <a href="https://techrights.org/n/2025/09/29/What_is_Roy_and_Rianne_s_Righteously_Royalty_free_RSS_Reader.shtml">A news reader that uses OPML files and parses RSS feeds</a>. Here's <a href="https://techrights.org/n/2024/06/17/Roy_and_Rianne_s_Righteously_Royalty_free_RSS_Reader_R_R_R_R_R_.shtml">how we use it</a>.</p>
<p><b>Where to get it</b>: <a href="gemini://gemini.techrights.org/git/tr-git/Links/rrrrrr.py">gemini://gemini.techrights.org/git/tr-git/Links/rrrrrr.py</a> (<a href="https://geminiprotocol.net/software/">use a Gemini client</a> if you don't have one <em>already</em>)</p>
<p><b>What is needs</b>: Python, <a href="https://sqlite.org/">SQLite</a>, and some relatively basic technical skills (no programming required)</p>
<p><b>Licence</b>: AGPLv3</p>
<p><b>Contact details</b>: <a href="/irc.shtml">IRC</a> or <a href="https://schestowitz.com/PGP/">E-mail</a> (we welcome patches)</p>
<p>
The first release was <a href="http://techrights.org/n/2024/06/24/Roy_and_Rianne_s_Righteously_Royalty_free_RSS_Reader_R_R_R_R_R_.shtml">Version 0.2</a>.
See the Gemini link above for the latest version.
</p>
</div>
<h2 class="latest">Other Recent Techrights Posts</h2>
<!--#include virtual="/latest-news.html"-->
<!--#include virtual="/footer.html"-->
</body>
</html>
Generator/HTML/irc.shtml
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>IRC and Techrights</title>
<meta name="dc.date.created" content="20220712" />
<meta name="dc.description" content="Techrights IRC channels" />
<link rel="stylesheet" href="/CSS/techrights.css"
media="screen" type="text/css" />
</head>
<body>
<!--#include virtual="/header.html"-->
<!--#include virtual="/navigation.html"-->
<!--#include virtual="/feeds.html"-->
<div class="post">
<h1>IRC and Techrights</h1>
<p><em>Techrights</em> invites further discussion of the shared articles on Internet Relay Chat (IRC)...</p>
<p>The main IRC channel is <code>#techrights</code> at <code>irc.techrights.org</code>. To use your <em>own</em> IRC client, join channel <code>#techrights</code> in <a href="irc://irc.techrights.org"><code>irc.techrights.org</code></a>.</p>
<details>
<summary>Try the Mibbit browser-based client if your browser is encumbered by JavaScript:</summary>
<iframe width="470" height="380" scrolling="no" frameborder="0" src="https://widget.mibbit.com/?settings=7ca12664887d4b6e7a0fa6552f9e0de6&server=irc.techrights.org&autoConnect=true&channel=%23techrights" style="border:4px double gray; box-shadow: 5px 5px 5px #222;">
</iframe>
</details>
<p>Use any of the above. Again, use <u>with caution</u>. There may be privacy concerns with using the browser-based clients, so try to use your own IRC client before trying browser-based clients like Mibbit or Kiwiirc. Download an IRC client and enter the required details into it. The Internet Relay Chat (IRC) channel is <code>#techrights</code> at the IRC network <a href="irc://irc.techrights.org"><code>techrights.org</code></a>.</p>
<p>The IRC chats can be used for direct messaging as well.</p>
</div>
<h2 class="latest">Other Recent Techrights Posts</h2>
<!--#include virtual="/latest-news.html"-->
<!--#include virtual="/footer.html"-->
</body>
</html>
Generator/HTML/footer.html
<div class="footer"> <ul> <li><a href="/index.shtml">Home</a></li> <li><a href="/about.shtml">About</a></li> <li><a href="/irc.shtml">IRC</a></li> <li><a href="/search/query">Search</a></li> <li><a href="/feed.xml">Feed</a></li> </ul> </div>
Generator/HTML/sitemap.shtml
<!DOCTYPE html>
<html>
<head>
<!-- 1 -->
<meta name="dc.date.modified" content="2023-09-23T12:35" />
<meta name="dc.description"
content="Looking ahead, we'll probably produce more stories than before because lessening the underlying complexity lets us focus on substance" />
<title>Techrights — Welcome to the New Techrights' Site Map</title>
<meta name="dc.title"
content="Welcome to Techrights' Site Map" />
<link rel="stylesheet" href="/CSS/techrights.css"
media="screen" type="text/css" />
<link rel="alternate" type="application/rss+xml"
href="/feed.xml" title="Techrights" />
<style type="text/css" media="screen">
tr.odd {
/* background: #eee; */
background: #eee; /* Old browsers */
background: -moz-linear-gradient(top, #ffffff 0%, #f1f1f1 50%, #e1e1e1 51%, #f6f6f6 100%); /* FF3.6+ */
background: -webkit-gradient(linear, left top, left bottom, color-stop(0%,#ffffff), color-stop(50%,#f1f1f1), color-stop(51%,#e1e1e1), color-stop(100%,#f6f6f6)); /* Chrome,Safari4+ */
background: -webkit-linear-gradient(top, #ffffff 0%,#f1f1f1 50%,#e1e1e1 51%,#f6f6f6 100%); /* Chrome10+,Safari5.1+ */
background: -o-linear-gradient(top, #ffffff 0%,#f1f1f1 50%,#e1e1e1 51%,#f6f6f6 100%); /* Opera 11.10+ */
background: -ms-linear-gradient(top, #ffffff 0%,#f1f1f1 50%,#eaeaea 51%,#f6f6f6 100%); /* IE10+ */
background: linear-gradient(to bottom, #ffffff 0%,#f1f1f1 50%,#eaeaea 51%,#f6f6f6 100%); /* W3C */
filter: progid:DXImageTransform.Microsoft.gradient( startColorstr='#ffffff', endColorstr='#f6f6f6',GradientType=0 ); /* IE6-9 */
}
tr.even {
/* background: #ccc; */
background: #ccc; /* Old browsers */
background: -moz-linear-gradient(top, #f2f6f8 0%, #d8e1e7 50%, #b5c6d0 51%, #e0eff9 100%); /* FF3.6+ */
background: -webkit-gradient(linear, left top, left bottom, color-stop(0%,#f2f6f8), color-stop(50%,#d8e1e7), color-stop(51%,#b5c6d0), color-stop(100%,#e0eff9)); /* Chrome,Safari4+ */
background: -webkit-linear-gradient(top, #d2f6f8 0%,#d8e1e7 50%,#b5c6d0 51%,#e0eff9 100%); /* Chrome10+,Safari5.1+ */
background: -o-linear-gradient(top, #fff6f8 0%,#d8e1e7 50%,#d5d6d0 51%,#e0eff9 100%); /* Opera 11.10+ */
background: -ms-linear-gradient(top, #fff6f8 0%,#d8e1e7 50%,#d5d6d0 51%,#f0eff9 100%); /* IE10+ */
background: linear-gradient(to bottom, #fff6f8 0%,#d8e1e7 50%,#d5dedd 51%,#f0eff9 100%); /* W3C */
filter: progid:DXImageTransform.Microsoft.gradient( startColorstr='#f2f6f8', endColorstr='#f0fff9',GradientType=0 ); /* IE6-9 */
}
hr {
overflow: visible; /* For IE */
padding: 0;
border: none;
border-top: medium double #333;
color: #333;
text-align: center;
}
abbr[title]
{
border-bottom: 1px dashed #ADADAD;
text-decoration: none;
cursor: help;
}
hr:after {
content: "Techrights";
display: inline-block;
position: relative;
top: -0.7em;
font-variant: small-caps;
font-size: 0.5em;
padding: 0 0.25em;
background: white;
}
</style>
</head>
<body>
<!--#include virtual="/header.html"-->
<!--#include virtual="/feeds.html"-->
<div class="post">
<!--#include virtual="/navigation.html"-->
<font size="4"><em>"Perfection is achieved, not when there is nothing more to add, but when there is nothing left to take away."</em></font>
<p align="right">
~ <font size="3">Antoine de Saint-Exupery </font>
</p>
<h1>Welcome to Techrights' Site Map</h1>
<p>Welcome to the new generation of <em>Techrights</em> (<a href="/upgrades">Techrights Has Upgraded</a>), a site founded in 2006.</p>
<table style="border-style:groove;" width="100%" cellspacing="4">
<tbody>
<tr class="odd">
<td></td>
<td>
<h4><abbr title="A distributed alternative to the Web site">IPFS</abbr> ䷉</h4>
<ul>
<li><a href="http://techrights.org/ipfs/index.html">IPFS index (HTML)</a></li>
<li><a href="http://techrights.org/ipfs/txt/txt">IPFS raw</a></li>
</ul>
<hr />
<h4><abbr title="A lightweight alternative to the World Wide Web">Gemini</abbr> ䷉</h4>
<ul>
<li><a href="gemini://gemini.techrights.org/">Techrights on Gemini</a> - <a href="http://gemini.techrights.org/">HTTP proxy</a></li>
</ul>
<hr />
<h4>Video ✇</h4>
<ul>
<li><a href="http://techrights.org/videos/archive/index.html">Techrights videos</a></li>
</ul>
<hr />
</td>
<td valign="baseline">
<h4><abbr title="Follow the site using XML-based indices">Syndication</abbr> ℜ</h4>
<a href="http://techrights.org/feed.xml">RSS feed</a> | <a href="http://techrights.org/feed/atom/">Atom</a>
<hr />
<a href="/irc.shtml" title="Enter IRC channels">Contact us</a> (<abbr title="Internet Relay Chat (IRC), an application layer protocol that facilitates communication in the form of text">IRC</abbr> chat)
<hr />
For <abbr title="Use end-to-end encryption to ensure only the recipient of messages and accompanying material can see the contents">privacy</abbr>: <a href="http://schestowitz.com/PGP/" title="PGP Key">encrypted/PGP</a>
</td>
</tr>
</tbody>
</table>
</div>
<h1>Recent Techrights' Posts</h1>
<!--#include virtual="/latest-news.html"-->
<!--#include virtual="/footer.html"-->
</body>
</html>
Generator/HTML/.directory-listing-ok
Generator/HTML/CSS/techrights.css
@charset "utf-8";
a:link {
background-image: linear-gradient(#0000ee, #0000ee);
background-size: 0% 0.1em;
background-position-y: 100%;
background-position-x: 50%;
background-repeat: no-repeat;
transition: none;
text-decoration: underline;
}
a:hover {
background-size: 100% 0.1em;
text-decoration: none;
}
a[href ^= "http"]::before {
content: "↺ ";
color: #000;
}
a[href ^= "gemini"]::before {
content: "↝ ";
color: #000;
}
a[href ^= "http"]:hover {
text-underline-offset: 0.2em;
transition: background-size 0.2s ease-in-out;
}
body {
font-family: Tahoma, Verdana, Segoe, sans-serif;
margin: 0;
background-color: #fff;
text-align: left;
width: 100%;
padding: 0;
}
details {
margin: 0 2em;
}
details[open] {
animation: animateDown 0.2s linear forwards;
}
h1.recent {
margin: 0 0.5em;
}
div.header {
padding-top: 0;
background-color: #f2f2f0;
text-align: center;
min-height: 9em;
margin-bottom: 0;
padding-bottom: 0;
}
div.header a img {
z-index: -1;
}
div.header h1 {
margin-left: 4em;
text-shadow: -0.1em 0.1em #eee, 0.1em 0.1em #444,
0.1em 0.1em #eee, -0.1em 0.1em #444;
text-transform: uppercase;
}
div.header p {
margin-left: 4em;
font-style: italic;
}
div.latest {
font-size: 90%;
border-radius: 2.5em;
background: #eee;
border: medium solid #ddd;
margin: 0.5em;
}
div.latest dl {
padding-left: 2em;
}
div.latest dt.updated:after {
content: " ☚ updated today";
font-size: 75%;
}
div.latest dl dt:hover + dd {
font-style: normal;
}
div.latest dl dt + dd:after {
content: " ";
}
div.latest dl dt:hover + dd:after {
content: " •";
}
h2.latest {
margin-left: 0.5em;
margin-right: 0.5em;
}
div.navigation {
position: relative;
text-align: center;
font-size: 85%;
margin: 0 1em 0 0;
padding: 2em 2em 0 2em;
}
div.navigation ul {
border: medium solid #000;
border-radius: 2em;
list-style: none;
}
div.navigation ul li {
display: inline;
margin: 0;
padding: 1em;
border: none;
}
div.navigation2 {
margin: 0.5em;
padding: 1em 2em 1em 0.2em;
border-radius: 2.4em;
border: medium solid #000;
background-color: #d0d0d0;
text-align: center;
font-weight: bold;
font-size: 90%;
}
div.navigation2 > ul {
list-style-type: none;
display: flex;
padding 0em 1em 0em 1em;
margin: 0;
justify-content: space-between;
}
div.navigation2 > ul > li {
display: inline;
/* top right bottom left */
margin: 0 0 0 0;
border: none;
padding: 0 1em 0 1em;
}
div.navigation2 > ul:before {
content: " ←";
text-decoration: none;
color: #000;
}
div.navigation2 > ul:after {
content: " →";
text-decoration: none;
color: #000;
}
div.navigation2 > ul > li:first-of-type {
margin: 0 1em 0 0;
padding: 0 1em 0 0;
}
div.navigation2 > ul > li:last-of-type {
margin: 0 0 0 1em;
padding: 0 0 0 1em;
}
div.navigation2 ul li a:link {
background-image: linear-gradient(#0000ee, #0000ee),
linear-gradient(#0000ee, #0000ee);
background-size: 0% 0.1em;
background-position-y: 100%;
background-position-x: 0%, 100%;
background-repeat: no-repeat;
text-decoration: underline;
}
div.navigation2 ul li a:hover {
text-decoration: none;
background-size: 100% 0.1em;
}
div.error {
border: thin solid #000;
background-image: repeating-linear-gradient(#f44, #a88 10%, #f44 100%);
padding-left: 1em;
padding-right: 1em;
box-shadow: 0.4em 0.4em 0.4em #555;
}
@keyframes slidein {
from {
margin-left: 100%;
width: 300%;
}
to {
margin-left: 0%;
width: 100%;
}
}
@keyframes animateDown {
0% {
opacity: 0;
transform: translatey(-15px);
}
100% {
opacity: 1;
transform: translatey(0);
}
}
div.error h1 {
animation-duration: 1s;
animation-name: slidein;
margin-left: 0%;
}
div.error p.notfound {
font-family: monospace;
animation-duration: 2s;
animation-name: slidein;
}
div.post {
background-image: linear-gradient(#c9cfc9, #fff 10%, #d1d8d1 80%);
padding: 0;
border: thin solid #000;
}
div.post:after {
visibility: hidden;
display: block;
font-size: 0;
content: " ";
clear: left;
height: 0;
}
div.post > h1,
div.post > h2,
div.post > h3,
div.post > h4,
div.post > h5 {
margin-left: 0.5em;
margin-right: 0.5em;
}
div.post > p {
margin-left: 1em;
margin-right: 1em;
}
div.post span.date {
box-shadow: 0.1em 0.1em 0.1em #555;
text-decoration: auto;
padding-left: 0.5em;
padding-right: 0.5em;
color: #555;
border-radius: 2.4em;
}
div.post > div ul {
margin: 0 0 0 1em;
padding: 0 0 0 1em;
list-style: none;
}
/* entries */
div.post ol > li:has(>h5),
div.post ul > li:has(>h5) {
margin: 0 0 0.2rem 0rem;
padding: 0 0 0.2rem 0.5rem;
border-radius: 0.5rem;
border: thin solid #000;
background-image: repeating-linear-gradient(#ccc, #ddd 1em, #ccd 2em);
list-style: none;
}
div.post ol > li:has(>h5) {
counter-increment: step-counter;
}
div.post ol > li:has(>h5):before {
float: left;
padding: 0.35rem 0.25rem 0.2rem 0.5rem;
font-family: Times,Georgia,serif;
font-weight: bold;
content: counter(step-counter) ".";
}
div.post ul > li:has(>h5) {
text-indent: -1rem;
padding-left: 3rem;
}
div.post blockquote {
quotes: "«" "»" "‘" "’";
font-family: serif;
text-align: left;
margin: 0 0.25rem 0.1rem 1rem;
padding: 0.2rem 0.5rem 0.5rem 1rem;
border: thin solid #888;
clear: both;
}
div.post > blockquote:before {
color: #444;
margin: 0 0.25em 0.1em 0.1em;
padding: 0 0.25em 0.2em 0.1em;
vertical-align: 1.2em;
text-shadow: 0.1em 0.1em 0.1em #555;
content: open-quote;
}
div.post blockquote:after {
color: #444;
margin: 0 0.25em 0 0.1em;
padding: 0 0.25em 0em 0.25em;
vertical-align: -1em;
text-shadow: 0.1em 0.1em 0.1em #555;
content: close-quote;
}
div.post blockquote[cite]:after {
white-space: pre-wrap;
padding: 0 0.25em 0 0.1em;
content: close-quote " \A \A \00a0 \00a0 — " attr(cite);
}
div.post li:has(>h5) blockquote:before {
padding: 0 0 0 1em;
color: #444;
vertical-align: 0em;
text-shadow: 0.1em 0.1em 0.1em #555;
content: "«";
}
div.post li:has(>h5) blockquote:after {
padding: 0 0 0 -3em;
color: #444;
vertical-align: 0em;
text-shadow: 0.1em 0.1em 0.1em #555;
content: "»";
}
div.post blockquote:empty {
display: none;
}
div.post ul, div.post ol, div.post dl {
margin: 1em 2em 2em 2em;
}
div.post ol li blockquote, div.post ul li blockquote {
margin: 0 0.25em 0.1em 0.1em;
padding: 0 0.25em 0.2em 0.1em;
border: none;
}
div.post blockquote p {
text-indent: 0;
margin: 0.25em 0.25em 0.1em 0.3em;
padding: 0.3em 0.5em 0 0.5em;
}
div.post blockquote.reprint {
padding: 0.5em;
background-color: #e8e8e8;
border-radius: 0.2rem;
}
div.post blockquote.reprint p {
border: none;
background-color: #e8e8e8;
}
div.post h1 {
width: 80%;
text-align: left;
font-size: 125%;
}
div.post p.author {
text-align: right;
font-size: 80%;
}
div.post > p.dropcap-first:first-letter {
text-shadow: #888 0.1em 0.1em 0.1em;
float: left;
font-size: 200%;
position: absolute;
line-height: 90%;
font-family: Times,Georgia,serif;
}
div.post img {
clear: both;
float: right;
padding: 0.3em 0.6em 0.3em 0.6em;
box-shadow: 0.4em 0.4em 0.4em #222;
border: medium solid #aaa;
border-radius: 2.5rem;
margin: -0.5em 1em 1em 0em;
max-width: 30%;
}
div.post img:hover {
transform: scale(1.02); /* (102% zoom - Note: if the zoom is too large, it will go outside of the viewport) */
/* opacity: 0.3; */
}
div.post pre {
font-family: monospace;
background-color: #eee;
border: thin solid #444;
margin: 0 1rem;
padding: 0.5rem 0.5rem;
/*
clip-path: polygon(
0 1rem,
1rem 0,
calc(100% - 1rem) 0,
100% 1rem,
100% calc(100% - 1rem),
calc(100% - 1rem) 100%,
1rem 100%,
0 calc(100% - 1rem)
);
*/
}
div.post a.readon {
border-radius: -1.3rem;
border: thin solid #222;
padding: 0.1em 0.25em;
margin-left: 0.2em;
background-image: radial-gradient(ellipse farthest-corner at 30% 20%,
#c6c6c6 20%, #1c1c1c 120%);
background-size: 100%;
box-shadow: 0.2em 0.2em #8f8f8f;
text-align: center;
color: #444;
text-shadow: 0.1em 0.1em #ccc;
text-decoration: none;
font-family: serif;
white-space: nowrap;
position: relative;
}
div.post a.readon:hover {
background-image: radial-gradient(ellipse farthest-corner at 30% 20%,
#767676 20%, #7c7c7c 120%);
color: #000;
}
div.post a.readon[title]:after {
content: "Via: " attr(title);
position: relative;
font-size: 95%;
font-weight: bold;
left: 120%;
color: #222;
}
div.post a > img {
margin: 0 2rem;
padding: 1rem 2.5rem;
}
div.feedlist {
position: relative;
float: right;
max-width: 20%;
font-size: 75%;
padding: 1em;
border-top: thin solid #000;
border-bottom: thin solid #000;
border-left: thin solid #000;
background-image: url("/Images/F1F1F1E9E9E9CACACAFFFFFF_108.png");
}
div.feedlist > h1,h2,h3,h4 {
margin-left: 0em;
margin-right: 0em;
}
h1, h2, h3, h4, h5, h6{
font-weight: bold;
font-family: "Liberation Serif", FreeSerif, serif;
margin: 0.3em 0.1em 0.1em 0.1em;
padding: 0.3em 0.1em 0.2em 0.1em;
}
h1 {
font-size: 200%;
}
h2 {
font-size: 150%;
}
h3 {
font-size: 125%;
}
h4 {
font-size: 115%;
}
h6 {
font-size: 110%;
padding: 1.5em;
border: thin solid #aaa;
border-radius: 1.5rem;
}
div.footer {
clear: both;
justify-content: center;
text-align: center;
margin: 0 auto 2em auto;
height: 5em;
/* background-image: linear-gradient(#c9cfc9, #fff 10%, #d1d8d1 80%); */
background: #d1d8d1;
padding: 1em;
font-size: 85%;
box-shadow: 1.5em 1.5em 1.5em #444;
}
iframe {
box-shadow: 1.5em 1.5em 1.5em #444;
float:right;
margin: 2em 2em 0.2em 2em;
}
div.bulletin {
grid-auto-flow: column dense;
border: thin solid #576707;
border-radius: 0.5rem;
gap: 3px;
}
div.bulletin dl {
grid-template-columns: repeat(5, 1fr);
column-gap: 10px;
row-gap: 1em;
grid-template-rows: auto auto;
display: grid;
}
Generator/HTML/CSS/techrights.index.css
span.date {
font-family: monospace;
}
sup {
color: #888;
}
ul.archive > li {
list-style: none;
padding: 0 0.5em 0 0.5em;
}
ul.archive > li:hover {
background-color: #ff4;
}
Generator/HTML/CSS/techrights.search.css
h1 { margin: 0 1rem; }
form { margin-left: 2rem;
margin-right: 2rem; }
table.results {
margin-left: 0.5rem;
margin-top: 0.5rem;
border-spacing: 0 0.1rem; }
table.results tr:nth-child(odd) td { background-color: hsl(0deg 0% 80%); }
table.results tr:nth-child(even) td { background-color: hsl(0deg 0% 90%); }
table.results tr td { padding-left: 0.2rem; margin: 0; }
table.results tr td:nth-child(1) {
border-top-left-radius: 0.5rem;
border-bottom-left-radius: 0.5rem; }
table.results tr td:last-child {
border-top-right-radius: 0.5rem;
border-bottom-right-radius: 0.5rem; }
Generator/HTML/CSS/.directory-listing-ok
Generator/HTML/CSS/techrights.2026.css
@charset "utf-8";
a:link {
background-image: linear-gradient(#0000ee, #0000ee);
background-size: 0% 0.1em;
background-position-y: 100%;
background-position-x: 50%;
background-repeat: no-repeat;
transition: none;
text-decoration: underline;
}
a:hover {
background-size: 100% 0.1em;
text-decoration: none;
}
a[href ^= "http"]::before {
content: "↺ ";
color: #000;
}
a[href ^= "gemini"]::before {
content: "↝ ";
color: #000;
}
a[href ^= "http"]:hover {
text-underline-offset: 0.2em;
transition: background-size 0.2s ease-in-out;
}
body {
font-family: Tahoma, Verdana, Segoe, sans-serif;
margin: 0;
padding: 0;
background-color: #fff;
color: #000;
text-align: left;
width: 100%;
}
details {
margin: 0 2em;
}
/* it seems that these animation definitions must be after the calling rules */
@keyframes slidein {
from {
margin-left: 100%;
width: 300%;
}
to {
margin-left: 0%;
width: 100%;
}
}
h1.recent {
margin: 0 0.5em;
}
div.header {
padding-top: 0;
background-color: #f2f2f0;
text-align: center;
min-height: 132px;
margin-bottom: 0;
padding-bottom: 1rem;
}
div.header a img {
z-index: -1;
}
div.header h1 {
margin-left: 4em;
text-shadow: -0.1em 0.1em #eee, 0.1em 0.1em #444,
0.1em 0.1em #eee, -0.1em 0.1em #444;
text-transform: uppercase;
}
div.header p {
margin-left: 4em;
font-style: italic;
}
div.latest {
font-size: 90%;
border-radius: 2.4rem;
background: #eee;
border: medium solid #888;
margin: 0.5em;
}
div.latest dl {
padding-left: 2em;
}
div.latest dt.updated:after {
content: " ☚ updated today";
font-size: 75%;
}
div.latest dl dt:hover + dd {
font-style: normal;
}
div.latest dl dt + dd:after {
content: " ";
}
div.latest dl dt:hover + dd:after {
content: " •";
}
h2.latest {
margin-left: 0.5em;
margin-right: 0.5em;
}
div.navigation {
position: relative;
text-align: center;
font-size: 85%;
margin: 0 1em 0 0;
padding: 2em 2em 0 2em;
}
div.footer ul,
div.navigation ul {
border: medium solid #000;
border-radius: 2rem;
list-style: none;
background-color: #e0e0e0;
}
div.footer ul li,
div.navigation ul li {
display: inline;
margin: 0;
padding: 1em;
border: none;
}
div.navigation2 {
clear: both;
margin: 0.5em;
padding: 1em 2em 1em 0.2em;
border-radius: 2.4rem;
border: medium solid #000;
background-color: #d0d0d0;
text-align: center;
font-weight: bold;
font-size: 90%;
}
div.navigation2 > ul {
list-style-type: none;
display: flex;
}
div.navigation2 > ul > li {
/* top right bottom left */
margin: 0 0 0 0;
padding: 0 1em 0 1em;
flex: 1 1 auto;
}
div.navigation2 > ul:before {
content: " ← ";
padding-right: 1em;
}
div.navigation2 > ul:after {
content: " → ";
padding-left: 1em;
}
div.navigation2 > ul > li:first-of-type {
margin: 0 1em 0 0;
padding: 0 1em 0 0;
text-align: left;
}
div.navigation2 > ul > li:last-of-type {
margin: 0 0 0 1em;
padding: 0 0 0 1em;
text-align: right;
}
div.navigation2 ul li a:link {
background-image: linear-gradient(#0000ee, #0000ee),
linear-gradient(#0000ee, #0000ee);
background-size: 0% 0.1em;
background-position-y: 100%;
background-position-x: 0%, 100%;
background-repeat: no-repeat;
}
div.footer ul li a:hover,
div.navigation ul li a:hover,
div.navigation2 ul li a:hover {
background-size: 100% 0.2em;
}
div.error {
border: thin solid #000;
background-image: repeating-linear-gradient(#f44, #a88 10%, #f44 100%);
padding-left: 1em;
padding-right: 1em;
box-shadow: 0.4em #888;
}
div.error h1 {
animation-duration: 1s;
animation-name: slidein;
margin-left: 0%;
}
div.error p.notfound {
font-family: monospace;
animation-duration: 2s;
animation-name: slidein;
}
div.post {
background-image: linear-gradient(#c9cfc9, #fff 10%, #d1d8d1 80%);
padding: 0;
border-top, border-bottom: thin solid #000;
overflow: auto; /* to contain floating images */
}
div.post:after {
visibility: hidden; /* */
display: block;
font-size: 0;
content: " ";
clear: left;
height: 0;
}
div.post > p {
margin: 0.5em 1em;
}
div.post span.date {
padding: 0 1.5em;
color: #555;
}
div.post > div ul {
margin: 0 0 0 1em;
padding: 0 0 0 1em;
list-style: none;
}
/* entries */
div.post ol > li:has(>h5),
div.post ul > li:has(>h5) {
margin: 0 2rem 0.5rem 1rem;
border-radius: 0.5rem;
border: thin solid #000;
background-image: repeating-linear-gradient(#ccc, #ddd 1em, #ccd 2em);
list-style: none;
}
div.post ol > li:has(>h5) {
counter-increment: step-counter;
}
div.post ol > li:has(>h5):before {
float: left;
padding: 0.35rem 0.25rem 0.2rem 0.5rem;
font-weight: bold;
content: counter(step-counter) ".";
font-family: "Liberation Serif", FreeSerif, serif;
}
div.post ul > li:has(>h5) {
display: block;
text-indent: -1rem;
padding-left: 3rem;
}
div.post blockquote {
quotes: "«" "»" "‘" "’";
text-align: left;
margin: 0 0.25rem 0.1rem 1rem;
padding: 0.2rem 0.5rem 0.5rem 1rem;
border: thin solid #888;
clear: both;
font-family: "Liberation Serif", FreeSerif, serif;
}
div.post > blockquote:before {
color: #444;
margin: 0 0.25em 0.1em 0.1em;
padding: 0 0.25em 0.2em 0.1em;
vertical-align: 1.2em;
text-shadow: 0.1em 0.1em 0.1em #888;
content: open-quote;
}
div.post blockquote:after {
color: #444;
margin: 0 0.25em 0 0.1em;
padding: 0 0.25em 0em 0.25em;
vertical-align: -1em;
text-shadow: 0.1em 0.1em 0.1em #888;
content: close-quote;
}
div.post blockquote[cite]:after {
white-space: pre-wrap;
padding: 0 0.25em 0 0.1em;
content: close-quote " \A \A \00a0 \00a0 — " attr(cite);
}
div.post li:has(>h5) blockquote:before {
padding: 0 0 0 1em;
color: #444;
vertical-align: 0em;
text-shadow: 0.1em 0.1em 0.1em #888;
content: "«";
}
div.post li:has(>h5) blockquote:after {
padding: 0 0 0 0;
margin-right: -3rem;
color: #444;
vertical-align: 0em;
text-shadow: 0.1em 0.1em 0.1em #888;
content: "»";
}
div.post blockquote:empty {
display: none;
}
div.post ul, div.post ol, div.post dl {
margin: 1em 2em 2em 2em;
}
div.post ol li blockquote, div.post ul li blockquote {
margin: 0 0.25em 0.1em 0.1em;
padding: 0 0.25em 0.2em 0.1em;
border: none;
}
div.post blockquote p {
text-indent: 0;
margin: 0.25em 0.25em 0.1em 0.3em;
padding: 0.3em 0.5em 0 0.5em;
}
div.post blockquote.reprint {
padding: 0.5em;
background-color: #e8e8e8;
border-radius: 0.2rem;
}
div.post blockquote.reprint p {
border: none;
background-color: #e8e8e8;
}
div.post h1 {
width: 80%;
text-align: left;
font-size: 125%;
}
div.post p.author {
text-align: right;
font-size: 80%;
}
div.post > p.dropcap-first:first-letter {
text-shadow: #888 0.1rem 0.2rem;
margin-right: 0.3rem;
initial-letter: 3 3;
font-family: "Liberation Serif", FreeSerif, serif;
}
div.post img.links {
clear: both;
float: right;
}
div.post p.side {
float: right;
}
div.post p.center {
clear: both;
border: thin solid #000;
border-collapse: collapse;
max-width: 60%;
overflow: hidden;
background-color: #bbb;
padding: 0rem;
margin-left: auto;
margin-right: auto;
}
div.post p.center a:link img {
border: thick solid #0000EE;
}
div.post p.center a:visited img {
border: thick solid #551A8B;
}
div.post p.center img {
float: none;
padding: 0 !important;
transition: all 0.5s;
display: block;
height: auto;
margin: 0.5rem auto 0.5rem auto;
max-width: 60%;
}
div.post p.center img:hover {
transform: scale(1.1);
}
div.post pre {
font-family: monospace;
background-color: #eee;
border: thin solid #444;
margin: 0 1rem;
padding: 0.5rem 0.5rem;
}
div.post a.readon {
border-radius: 1.3rem;
border: thin solid #222;
padding: 0.1em 0.25em;
margin-left: 0.2em;
background-image: radial-gradient(ellipse farthest-corner at 30% 20%,
#c6c6c6 20%, #1c1c1c 120%);
background-size: 100%;
box-shadow: 0.2em #888;
text-align: center;
color: #444;
text-shadow: 0.1em 0.1em #ccc;
text-decoration: none;
white-space: nowrap;
position: relative;
font-family: "Liberation Serif", FreeSerif, serif;
}
div.post a.readon:hover {
background-image: radial-gradient(ellipse farthest-corner at 30% 20%,
#767676 20%, #7c7c7c 120%);
color: #000;
}
div.post a.readon[title]:after {
content: "Via: " attr(title);
position: relative;
font-size: 95%;
font-weight: bold;
left: 120%;
color: #222;
}
div.feedlist {
position: relative;
float: right;
max-width: 20%;
font-size: 75%;
padding: 1em;
border-top: thin solid #000;
border-bottom: thin solid #000;
border-left: thin solid #000;
background-image: url("/Images/F1F1F1E9E9E9CACACAFFFFFF_108.png");
}
div.feedlist > h1,h2,h3,h4 {
margin-left: 0em;
margin-right: 0em;
}
h1, h2, h3, h4, h5, h6{
font-weight: bold;
font-family: "Liberation Serif", FreeSerif, serif;
margin: 0.3em 0.5em 0.1em 0.5em;
padding: 0.3em 0.1em 0.2em 0.1em;
}
h1 {
font-size: 200%;
}
h2 {
font-size: 150%;
}
h3 {
font-size: 125%;
}
h4 {
font-size: 115%;
}
h6 {
font-size: 110%;
padding: 1.5em;
border: thin solid #aaa;
border-radius: 1.3rem;
}
div.footer {
clear: both;
justify-content: center;
text-align: center;
margin: 0 auto 2em auto;
padding: 1em;
height: 5em;
font-size: 85%;
box-shadow: 1.5em #888;
}
iframe {
box-shadow: 1.5em #888;
float:right;
margin: 2em 2em 0.2em 2em;
}
Generator/HTML/CSS/techrights-old.css
body {
padding: 0.5em;
background: #f2f2f3 url(/wp-content/themes/ocadia/images/sidebar-top.gif) right top no-repeat;
}
a[href ^= "http"]::before {
content: "↺ ";
color: #844;
}
a[href ^= "https"]::before {
content: "↺ ";
color: #000;
}
a[href^="gemini:"]:after {
content: " ♊ (Gemini URI ➦)";
font-weight:bold;
font-variant: small-caps;
text-shadow: 0 0 3px #888888;
padding-right: 15px;
}
a[href^="gemini:"]:hover {
background: url(/favicon.ico) right center no-repeat;
}
a[href^="http:"] {
background: url(/images/remote.gif) right center no-repeat;
padding-right: 15px;
}
a[href^="http:"]:hover {
background: url(/images/remote_a.gif) right center no-repeat;
}
a[href^="https:"] {
background: url(/images/remote.gif) right center no-repeat;
padding-right: 15px;
}
a[href^="https:"]:hover {
background: url(/images/remote_a.gif) right center no-repeat;
}
/* ...but not to absolute links in this domain... */
a[href^="http://techrights.org"] {
background: transparent;
padding-right: 0px;
}
a[href^="http://techrights.org"]:hover {
background: transparent;
}
a[href^="https://techrights.org"] {
background: transparent;
padding-right: 0px;
}
a[href^="https://techrights.org"]:hover {
background: transparent;
}
div.oldpost::before {
content: "Archived: ";
font-family: monospace;
font-size: 175%;
}
div.oldpost {
background-color: #eaf0f6;
font-family: "Lucida Sans Unicode", Tahoma, Geneva, sans-serif;
margin-top: 1em;
padding-left: 0.5em;
padding-right: 0.3em;
padding-bottom: 0.5em;
border-top: thin solid #000;
border-bottom: thin solid #000;
border-left: thin solid #888;
border-right: thin solid #888;
border-radius: 0.2em;
}
div.oldpost > ul > li.author {
list-style: none;
font-weight: bold;
}
div.oldpost > ul > ul.date {
list-style: none;
font-size: 75%;
}
div.oldpost > ul > ul.date > li:first-child {
font-weight: bold;
}
div.oldpost > ul > ul.date > li:first-child:after {
content: ",";
}
div.oldpost > ul > ul.date > li {
display: inline;
}
div.oldpost div.navigation {
background: #eaeaea url(/wp-content/themes/ocadia/images/sidebar.gif) no-repeat top left;
text-align: center;
border-top: thin solid #000;
border-bottom: thin solid #000;
width: 95%;
}
div.comments {
border-top: thin solid #888;
}
div.comments blockquote {
background-color: #fff0db;
background: url(/wp-content/themes/ocadia/images/commentalt.gif) repeat-y;
}
div.comments ul {
border: thin solid #888;
border-radius: 0.2em;
list-style: none;
padding-left: 0.5em;
padding-right: 0.5em;
padding-bottom: 0.5em;
background-color: #eed9c4;
}
div.comments ul > li {
padding-bottom: 0.5em;
}
div.comments ul > li + li {
border-top: thin solid #800;
}
div.comments ul > li > p.author {
font-weight: bold;
float: left;
}
div.comments ul > li > p.date {
float: right;
padding-right: 2em;
font-style: italic;
}
div.comments ul > li > div {
clear: both;
}
h1 + div.latest {
clear: both;
}
div.latest {
border-bottom: thin solid #000;
}
div.navigation {
background: #fafafa;
border-top: thin solid #000;
border-bottom: thin solid #000;
border-left: thin solid #888;
border-right: thin solid #888;
border-radius: 0.2em;
text-shadow: 3px 3px 3px #ffffff;
box-shadow: 0.1em 0.1em 0.1em #555;
margin-bottom: 0.2em;
margin-top: 0.4em;
width: 72%;
}
div.footer {
clear: both;
border-top: thin solid #999;
text-align: center;
width: 20%;
height: 5em;
background: #ffffff;
border-radius: 5em;
margin-left: 65%;
margin-bottom: 2em;
margin-top: 0.3em;
padding: 1em;
font-size: 85%;
box-shadow: 1.5em 1.5em 1.5em #999;
}
div.navigation ul li,
div.footer ul li {
background: url(/wp-content/themes/ocadia/images/perma.gif) no-repeat center left;
display: inline;
/* top right bottom left */
margin: 0 0 0 -1em;
border: none;
padding: 0 1em 0 1em;
}
div.navigation > a {
font-style: italic;
}
abbr {
color: #59708C;
}
blockquote {
font-size: 90%;
border: thin solid #888;
/* background-color: #fff0db; */
padding: 0.3em;
border-radius: 1.5em;
background: #F2F2FA url(/wp-content/themes/ocadia/images/commentalt.gif) repeat-y;
}
blockquote.evidence {
margin: 0 10px;
padding: 0.05em 20px;
border-top: 2px solid #444;
border-bottom: 2px solid #444;
font-size: 1.2em;
background: #EEE url(/wp-content/themes/ocadia/images/quote-alpha.png) no-repeat;
}
blockquote:before {
content: "“";
font-weight: bold;
font-size: 110%;
}
code {
color: #666;
}
blockquote:after {
/* content: "”"; */
font-weight: bold;
font-size: 110%;
content: "” "attr(cite)" ";
}
p.dropcap-first:first-letter {
display: inline-block;
margin: -0.1em 0 0 0;
padding: 0;
vertical-align: top;
font-size: 400%;
color: #708090;
float: left;
font-family: Times, serif, Georgia;
}
.pullQuote {
margin:12px 8px 12px 0;
display:block;
width:140px;
float:left;
font-size:1.8em;
font-weight:bold;
line-height:1.2em;
color:#1E477E;
border-top: 1px solid #CCC;
border-bottom: 1px solid #CCC;
background: url(/wp-content/themes/ocadia/images/quote-alpha.png) no-repeat;
}
.columns {
-moz-column-width: 12em;
-moz-column-gap: 1em;
-moz-column-rule: medium solid;
-webkit-column-width: 12em;
-webkit-column-gap: 1em;
-webkit-column-rule: medium solid;
}
h1 {
font-size: 3.2em;
font-family: Times, serif, Georgia;
font-weight: bold;
text-shadow: 3px 3px 3px #ccc;
box-shadow: 0.1em 0.1em 0.1em #999;
font-variant: small-caps;
color: #444;
padding: 1px 10px;
background-color: #efefef;
margin: 0;
text-align: center;
width: 95%;
}
h2, h3, h4, h5, h6 {
font-size: 1.6em;
margin: 1.2em 0;
text-shadow: 3px 3px 3px #ccc;
font-family: Georgia, serif;
color: #333B38;
}
h3 {
font-size: 1.3em;
}
h4 {
font-size: 1.2em;
}
h5 {
font-size: 1.1em;
}
h6 {
font-size: 1em;
}
img {
box-shadow: 0.2em 0.2em 0.2em 0.2em #555;
border-radius: 0.4em;
}
span.date {
box-shadow: 0.1em 0.1em 0.1em #555;
text-decoration: auto;
padding-left: 0.5em;
padding-right: 0.5em;
color: #555;
border-radius: 2.4em;
}
dl > dt {
background: url(/wp-content/themes/ocadia/images/deco.gif) no-repeat bottom right;
padding-left: 0;
}
dl > dd {
padding-left: 0;
border-bottom: 1px solid #D3D3D3
}
Generator/HTML/CSS/techrights-wiki.css
body {
font: x-small sans-serif;
background: #e0e8e8 url(/wp-content/themes/ocadia/images/head.png) repeat-x;
color: black;
margin: 2;
padding: 2;
}
a[href ^= "http"]::before {
content: "↺ ";
color: #844;
}
a[href ^= "https"]::before {
content: "↺ ";
color: #000;
}
a:hover {
text-decoration: underline;
color: #000;
}
.header {
padding-top: 4em;
}
.navigation {
background: #eaeaea url(/wp-content/themes/ocadia/images/sidebar.gif) no-repeat top left;
text-align: center;
border-top: thin solid #000;
border-bottom: thin solid #000;
width: 95%;
}
div.navigation {
background: #fafafa;
border-top: thin solid #000;
border-bottom: thin solid #000;
border-left: thin solid #888;
border-right: thin solid #888;
border-radius: 0.2em;
text-shadow: 3px 3px 3px #ffffff;
box-shadow: 0.1em 0.1em 0.1em #555;
margin-bottom: 0.2em;
margin-top: 0.4em;
width: 72%;
}
.navigation ul li {
background: url(/wp-content/themes/ocadia/images/perma.gif) no-repeat center left;
display: inline;
/* top right bottom left */
margin: 0 0 0 -1em;
border: none;
padding: 0 1em 0 1em;
}
li {
padding: 0em 0em 1em 0em;
margin: 1em 1em 1em 1em;
}
div.footer ul li {
background: url(/wp-content/themes/ocadia/images/perma.gif) no-repeat center left;
display: inline;
/* top right bottom left */
margin: 0 0 0 -1em;
border: none;
padding: 0 1em 0 1em;
}
h1 {
font-size: 3.2em;
font-family: Times, serif, Georgia;
font-weight: bold;
text-shadow: 3px 3px 3px #ccc;
box-shadow: 0.1em 0.1em 0.1em #999;
font-variant: small-caps;
color: #444;
# padding-bottom: 10em;
padding: 0.2em 0.1em 0.1em 0.1em;
background-color: #efefef;
margin: 0.2em 0.2em 1em 0.3em;
text-align: center;
width: 95%;
}
h2, h3, h4, h5, h6 {
font-size: 1.6em;
margin: 1.2em 0;
text-shadow: 3px 3px 3px #ccc;
font-family: Georgia, serif;
color: #333B38;
}
h2 {
font-size: 2.4em;
}
h3 {
font-size: 2.3em;
}
h4 {
font-size: 2.2em;
}
h5 {
font-size: 2.1em;
}
h6 {
font-size: 2em;
}
div.footer {
clear: both;
border-top: thin solid #999;
text-align: center;
width: 20%;
height: 5em;
background: #ffffff;
border-radius: 5em;
margin-left: 65%;
margin-bottom: 2em;
margin-top: 0.3em;
padding: 1em;
font-size: 85%;
box-shadow: 1.5em 1.5em 1.5em #999;
}
Generator/HTML/about.shtml
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>Techrights</title>
<meta name="dc.date.created" content="20220712" />
<meta name="dc.description" content="Do you waddle the waddle" />
<link rel="stylesheet" href="/CSS/techrights.css"
media="screen" type="text/css" />
</head>
<body>
<!--#include virtual="/header.html"-->
<!--#include virtual="/navigation.html"-->
<!--#include virtual="/feeds.html"-->
<div class="post">
<h1>About Techrights</h1>
<p>The site was founded in 2006 and it focuses on Free/libre (sometimes known as Open Source) software, especially GNU/Linux.</p>
<p><b>Why it counts</b>: This site offers an independent and direct analysis of world affairs, especially in the digital realm, not seeking to appease any commercial interests in doing so.</p>
<p><b>2023 Rebirth</b>: The site tackled 17 years of technical debt by going static.</p>
</div>
<h2 class="latest">Other Recent Techrights Posts</h2>
<!--#include virtual="/latest-news.html"-->
<!--#include virtual="/footer.html"-->
</body>
</html>
Generator/tr-update-entry-sql.pl
#!/usr/bin/perl
use utf8;
use Getopt::Long;
use URI;
use DBI qw(:sql_types :utils);
use Date::Calc qw(Today_and_Now);
use File::Temp qw(tempfile);
use HTML::TreeBuilder::XPath;
use HTML::FormatText;
use HTML::Entities;
use Capture::Tiny qw(capture capture_stdout);
use Term::ANSIColor;
use Config::Tiny;
use English;
use strict;
use warnings;
if ($ENV{'USER'} eq 'root' && ! $ENV{'EUID'}) {
print STDERR qq(Cannot run as root!\nAborting\n);
exit(1);
}
my $url = "";
my $recno = 0;
my $status = 1;
my $delete = 0;
my $help = 0;
my $config = '';
our $force = 0;
our $VERBOSE = 0;
my (
$gemtext_path,
$gemtext_draft_path,
$xhtml_path,
$xhtml_draft_path,
) = ('') x 4;
GetOptions ("url=s" => \$url,
"config|c=s" => \$config,
"delete|d" => \$delete,
"force" => \$force,
"recno=i" => \$recno,
"gemini:s" => \$gemtext_path,
"draft-gemini:s" => \$gemtext_draft_path,
"xhtml:s" => \$xhtml_path,
"draft-xhtml:s" => \$xhtml_draft_path,
"help" => \$help,
"verbose+" => \$VERBOSE,
)
or die("Error in runtime options\n");
my ( $script ) = ( $0 =~ m|(?!.*/)(.*)\.[^\.]*$|);
if ( ! $config ) {
warn("Provide configuration file via the -c option.\n");
my $err = 1;
usage($script, 'sample.conf', $err);
}
if (! -f $config) {
my $err = 1;
&usage($script, $config, $err);
exit(1);
} elsif (! -r $config) {
die("Configuration file '$config' is not readable\n");
}
my $configuration = Config::Tiny->read($config)
or die("Could not read configurationn file '$config': $!\n");
my $dbname = $configuration->{database}->{name}
or die("Database name missing from configuration file\n");
my $documentroot = $configuration->{webserver}->{documentroot}
or die("DocumentRoot missing from configuration file\n");
my $serverroot = $configuration->{webserver}->{serverroot}
or die("ServertRoot missing from configuration file\n");
my $geminiroot = $configuration->{gemini}->{geminiroot}
or die("GeminiRoot missing from configuration file\n");
if ($gemtext_path) {
$gemtext_path = $gemtext_path;
} else {
$gemtext_path = $geminiroot . "/n";
}
if ($gemtext_draft_path) {
$gemtext_draft_path = $gemtext_draft_path;
} else {
$gemtext_draft_path = $geminiroot . "/drafts";
}
if ($xhtml_path) {
$xhtml_path = $xhtml_path;
} else {
$xhtml_path = $documentroot . "/n";
}
if ($xhtml_draft_path) {
$xhtml_draft_path = $xhtml_draft_path;
} else {
$xhtml_draft_path = $documentroot . "/drafts";
}
my %metadata = ();
my $body = '';
my $rawtext = '';
my $dbfile = $serverroot . "/db/" . $dbname;
my $dbh = DBI->connect("dbi:SQLite:dbname=$dbfile", undef, undef,
{ AutoCommit => 0, RaiseError => 1 })
or die("Could not open database '$dbfile': $!\n");
my $draft_status = '';
if (!$delete) {
# edit the metadata and body, if available
if ($recno) {
# lookup by record number
($status, $draft_status) = get_status_from_recno($dbh, $recno);
%metadata = get_metadata($dbh, $recno, $draft_status);
} elsif ($url) {
# lookup by URL instead
( $recno, $status, $draft_status ) =
get_status_from_url($dbh, $url)
or die("Record not found for '$url'\n");
if ($recno) {
%metadata = get_metadata($dbh, $recno, $draft_status);
}
} else {
# nothing was found
$dbh->rollback;
$dbh->disconnect;
my $err = 1;
&usage($script, $config, $err);
exit(0);
}
if (! %metadata) {
# if no metadata was found
$dbh->rollback;
$dbh->disconnect;
my $err = 1;
&usage($script, $config, $err);
exit(0);
}
while(1) {
# loop until ok or quit
# fetch body
$body = get_body($dbh, $recno, $draft_status);
# edit metadata and body
($body, $rawtext, %metadata) = edit_record($body, %metadata);
my $i = '';
my $new_status = 0;
while (1) {
if ($draft_status) {
print "\nBody OK? [y/N/d/q] ";
} else {
print "\nBody OK? [y/N/q] ";
}
$i = lc <>;
chomp $i;
if ($i eq '') {
$i = 'n';
}
if ($i eq 'y' or $i eq 'n') {
$new_status = 1;
last;
} elsif ($i eq 'q') {
print qq(Exiting without changes\n);
my $rc = $dbh->disconnect or warn $dbh->errstr;
exit(0);
} elsif ($draft_status and $i eq 'd') {
last;
}
}
if ($i eq 'y') {
if ($draft_status) {
$new_status = 2;
}
} elsif ($draft_status and $i eq 'd') {
print qq(Saved as draft\n);
} else {
next;
}
# try to write to database
if (write_database($dbh, $recno, $draft_status, $new_status,
$body, $rawtext, %metadata)) {
# write was successful
if ($draft_status && ! $new_status) {
print "Record $recno Modified Successfully as Draft\n";
} elsif ($draft_status && $new_status eq 2) {
print "Record $recno Modified Successfully from Draft. ";
print "Ready to publish.\n";
} else {
print "Record Modified Successfully\n";
}
my $rc = $dbh->disconnect or warn $dbh->errstr;
exit(0);
} else {
# something went wrong
print qq(Exiting. Unchanged.\n);
exit(1);
}
}
} elsif ($delete) {
# delete a record
if (!$recno && $url) {
# lookup record by URL
( $recno, $status, $draft_status ) = get_status_from_url($dbh, $url)
or die("Record not found for '$url'\n");
} elsif (!$recno) {
# neither record number nor URL were given, so quit
my $err;
&usage($script, $config, $err);
} else {
# lookup record by record number
($status, $draft_status ) = get_status_from_recno($dbh, $recno);
}
if ($VERBOSE) {
if ($draft_status) {
print qq(Deleting Draft $recno\n);
} else {
print qq(Deleting Post $recno\n);
}
}
# try to delete designated record
if (delete_record_and_file($dbh, $recno, $draft_status)) {
# success
if ($draft_status) {
print "Draft Record $recno deleted\n";
} else {
print "Record $recno deleted\n";
}
last;
} else {
# deletion failed, for whatever reason
if ($draft_status) {
print "No Draft Record deleted\n";
} else {
print "No Record deleted\n";
}
last;
}
}
my $rc = $dbh->disconnect or warn $dbh->errstr;
exit(0);
sub usage {
my ($script, $config, $error) = @_;
print <<"EOU";
USAGE
$script --config CONFIG [-dfhv] --recno n | --url url
-c, --config path to configuration file
-r, --recno the record number in the SQL database for draft or post
-u, --url the http(s) URL for the post in question
-d, --delete remove the record designated by record number or URL
-f, --force don't stop for any errors during, for deletion only
-g, --gemini override destination path for GemText
--draft-gemini override destination for GemText drafts
-x, --xhtml override destination path for XHTML
--draft-xhtml override destination for XHTML drafts
-v, --verbose show debugging info, can be increased
-h, --help show this message
Either the record number or the URL is necessary, but not both. If both are supplied, only the record number will be used. If the URL is used, it will be parse for the date and the slug and those used to figure out which record to work on.
If searching by record number, drafts will be checked first. If nothing is found among the drafts, then posts will be searched.
The -g and -x options can each be used to point to other paths and override the defaults.
Drafts are stored in a different directory. The -dg and -dx options can each be used to point to other paths and override the defaults.
These paths are needed when deleting drafts or posts because the corresponding files will be removed, too.
EOU
if ($config eq 'sample.conf') {
print "Provide a configuration file, ";
} else {
print "Looking for config file in '$config',\n";
}
print <<EOC;
for example:
[database]
name = tr-static-site-generator.sqlite3
images = tr-static-site-generator-img.sqlite3
[gemini]
geminiroot = /home/gemini/site1.example.org/
[webserver]
documentroot = /var/www/site1.example.org/htdocs
serverroot = /var/www/site1.example.org/
EOC
if ($error) {
exit(1);
}
exit(0);
}
sub get_status_from_url {
# parse a URL and retrieve the corresponding record statuses
my ($dbh, $url) = @_;
my $draft_status = 0;
my $u = URI->new($url)
or die("Bad URL: $url\n");
my $scheme = $u->scheme || '';
my $host = $u->host || '';
my $path = $u->path || '';
if ($VERBOSE) {
print "S=$scheme\n";
print "H=$host\n";
print "P=$path\n";
}
my $recno = 0;
my $query;
my $sth;
if ($path =~ m|^/drafts/|) {
# process request for a draft record
if (($recno) = ( $path =~ m|^/drafts/([0-9]+)\.shtml$| )) {
$query = qq(SELECT recno, written FROM keys
WHERE recno=?);
$sth = $dbh->prepare($query);
$sth->execute($recno);
if (my $row = $sth->fetchrow_hashref) {
$recno = $row->{'recno'};
$status = $row->{'written'};
$draft_status = 1;
$sth->finish;
}
}
} elsif ($path =~ m|^/\w+/|) {
# process request for a regular, published record
my $keydate;
my ($year, $month, $day, $slug, $ballast);
if ( ($year, $month, $day, $slug, $ballast) =
( $path =~ m|^/n/([0-9]{4})/([0-9]{2})/([0-9]{2})/
(.*)\.([0-9]+)\.shtml$|x ) ) {
$keydate = $year.$month.$day;
$query = qq(SELECT recno, written FROM keys
WHERE date=?
AND slug=? AND ballast=?);
$sth = $dbh->prepare($query);
$sth->execute($keydate, $slug, $ballast);
} elsif ( ($year, $month, $day, $slug) =
( $path =~ m|^/n/([0-9]{4})/([0-9]{2})/([0-9]{2})/
(.* )\.shtml$|x ) ) {
$keydate = $year.$month.$day;
$query = qq(SELECT recno, written FROM keys
WHERE date=?
AND slug=?);
$sth = $dbh->prepare($query);
$sth->execute($keydate, $slug);
} else {
print qq(Missing valid path in '$url'\n);
$dbh->disconnect();
exit(1);
}
if (my $row = $sth->fetchrow_hashref) {
$recno = $row->{'recno'};
$status = $row->{'written'};
} else {
print qq(No record found associated with URL '$path'\n);
$dbh->disconnect();
exit(1);
}
$sth->finish;
} else {
# error -- nothing to work from
print qq(Missing path from '$url'\n);
exit(1);
}
return($recno, $status, $draft_status);
}
sub get_status_from_recno {
# use a record number to retrieve the corresponding record
my ($dbh, $recno) = @_;
# check drafts for that recno first
my $query = qq(SELECT written FROM draft_keys WHERE recno=$recno);
my $sth = $dbh->prepare($query);
$sth->execute();
my $draft_status = 0;
if (my $row = $sth->fetchrow_hashref) {
# a draft record was found
$status = $row->{'written'};
$draft_status = 1;
print qq(Draft $recno found\n);
} else {
# check regular posts for that record number, if there was no draft
$query = qq(SELECT written FROM keys WHERE recno=$recno);
$sth = $dbh->prepare($query);
$sth->execute();
if (my $row = $sth->fetchrow_hashref) {
# a regular post has been found
$status = $row->{'written'};
print qq(Post $recno found\n);
} else {
# failed to find anything
print qq(Record $recno not found in either drafts or posts\n);
$dbh->disconnect;
exit(1);
}
}
$sth->finish;
return($status, $draft_status);
}
sub get_metadata {
# fetch metadata for the designated regular or draft record
my ($dbh, $recno, $draft_status) = @_;
my %metadata = ();
# form a query for regular or draft record
my $query;
if ($draft_status) {
$query = qq(SELECT * FROM draft_metadata WHERE recno=?);
} else {
$query = qq(SELECT * FROM metadata WHERE recno=?);
}
# fetch the record's metadata
my $sth = $dbh->prepare($query);
$sth->execute($recno);
# save the retrieved metadata, if any, in a hash
while (my $row = $sth->fetchrow_hashref) {
my $term = $row->{'term'};
my $value = $row->{'value'};
push(@{$metadata{$term}}, $value);
}
$sth->finish;
return(%metadata);
}
sub get_body {
# fetch body for the designated regular or draft record
my ($dbh, $recno, $draft_status) = @_;
# form a query for regular or drafrt record
my $query;
if ($draft_status) {
$query = qq(SELECT body FROM draft_body WHERE recno=?);
} else {
$query = qq(SELECT body FROM body WHERE recno=?);
}
# fetch the record's body
my $sth = $dbh->prepare($query);
$sth->execute($recno);
# save the retrieved body, if any, as a string
my $row = $sth->fetchrow_hashref;
my $body = $row->{'body'} || '';
$sth->finish;
return($body);
}
sub edit_record {
# edit and return metadata and body
my ($body, %metadata) = @_;
my $done = 0;
my $title = '';
my $description = '';
# edit metadata in a loop until done or quit the program
while (!$done) {
for my $k (sort keys %metadata) {
if ($k =~ m/^dc\.date\.created/) {
print "$k [",join(';', @{$metadata{$k}}),"] \n";
} elsif ($k =~ m/^dc\.date\.modified/) {
my ($year,$month,$day, $hour,$min,$sec) = Today_and_Now(1);
my $date = sprintf("%04d-%02d-%02dT%02d:%02d",
$year,$month,$day,$hour,$min);
@{$metadata{$k}}[0]= $date;
print "$k [",join(';', @{$metadata{$k}}),"] \n";
} else {
if ($k =~ m/^dc\.description/) {
$description = join(';', @{$metadata{$k}});
} elsif ($k =~ m/^dc\.title/) {
$title = join(';', @{$metadata{$k}});
}
print "$k [",join(';', @{$metadata{$k}}),"] ";
my $v = <>;
chomp($v);
$v =~ tr/\x00-\x08\x0a-\x1f//ds;
$v =~ tr/\x09/ /s;
if ($v) {
# 0x3B is a semicolon
@{$metadata{$k}} = split(/\{x3b}/, $v);
}
}
}
print "\nMetadata OK? [y/N/q] ";
my $i = lc <>;
chomp $i;
if ($i eq 'y') {
$done = 1;
} elsif ($i eq 'q') {
$dbh->disconnect;
exit(0);
} else {
next;
}
}
# use a temp file to get the XHTML over to the next script
my $editor = File::Temp->new( TEMPLATE => 'temp.XXXXX',
DIR => '/tmp',
SUFFIX => '.tm.body1.tmp',
UNLINK => 1 );
my $validator = File::Temp->new( TEMPLATE => 'temp.XXXXX',
DIR => '/tmp',
SUFFIX => '.tm.body2.tmp',
UNLINK => 1 );
my $tmpfile = $editor->filename;
-f $tmpfile && unlink($tmpfile); # clear the way for nano
my $vfile = $validator->filename;
-f $vfile && unlink($vfile); # clear the way for nano
open (my $tf, ">", $tmpfile)
or die("Could not open '$tmpfile' for writing: $!\n");
print $tf $body;
close($tf);
# edit body in a loop until done or quit the program
my @cmd = ();
$done = 0;
while (!$done) {
# launch nano to edit body in a temporary file
@cmd = ('/usr/bin/nano', '--tabstospaces', $tmpfile);
system(@cmd) == 0
or die("editing '@cmd' failed: $?\n");
# read first temporart file into a string for processing
open(my $tf, "<", $tmpfile)
or die("Could not open '$tmpfile' for reading\n");
my $lines = "";
# escape ampersands, if needed
while (my $line = <$tf>) {
$line =~ s| \& | \& |gm;
$lines .= $line;
}
close ($tf);
# add paragraphs if there is no other XHTML markup
if ($lines =~ m/^(?!<[^>]+>).*$/m) {
$lines =~ s|^|<p>|;
$lines =~ s|\n\n+|</p>\n<p>\n|gm;
}
# save string to temporary file for conversion to HTML and validation
open(my $ov, ">", $vfile)
or die("Could not copy to '$vfile'\n");
print $ov $lines;
close ($ov);
# force conversion of $vfile to XHTML using tidy
@cmd = ('/usr/bin/tidy', '-m', '-q', '--show-info', 'no',
'--output-xml',
'--preserve-entities', 'yes', '-utf8', '-asxml', $vfile);
my ($stdout, $stderr, $result) = capture { system(@cmd) };
# validate $vfile now that it has become XHTML
@cmd = ('/usr/bin/tidy', '-q', '--show-info', 'no',
'--output-xml',
'--preserve-entities', 'yes', '-utf8', '-xml', $vfile);
($stdout, $result) = capture_stdout { system(@cmd) };
if ($result) {
# tidy failed, loop back to start and edit again
print STDERR "HTML validation failed\n";
print STDERR "press RETURN to continue editing";
my $i = <>;
next;
} else {
print "HTML validation succeeded\n";
}
# parse XHTML file to check for faux pas
my $xhtml = HTML::TreeBuilder::XPath->new;
$xhtml->implicit_tags(1);
$xhtml->no_expand_entities(1);
$xhtml->ignore_ignorable_whitespace(0);
$xhtml->no_space_compacting(1);
$xhtml->parse_file($vfile)
or die("Could not parse '$vfile' : $!\n");
my $error = 0;
# look for hotlinked images, report error if they are found
for my $hotlink ($xhtml->findnodes('//img[starts-with(@src,"http")]')) {
$error++;
}
if ($error) {
# a hotlinked image was found, try editing again
print STDERR "Failure: image hotlinking present. ";
print STDERR "Remove it to proceed.\n";
print STDERR "press RETURN";
my $i = <>;
next;
} else {
$done++;
}
$error = 0;
# require ALT text for IMG elements
for my $alt ($xhtml->findnodes('//img[not(@alt) or @alt[not(string())]]')) {
$error++;
}
if ($error) {
# the ALT text was missing, try editing again
print color('bold white');
print STDERR "Failure: missing or empty ALT attribute in IMG.";
print STDERR " Add it to proceed.\n";
print STDERR "press RETURN";
print color('reset');
my $i = <>;
$done = 0;
next;
} else {
$done++;
}
# blockquotes
for my $blockquote ($xhtml->findnodes('//blockquote')) {
my $new_blockquote = HTML::Element->new('blockquote');
# check all nodes within blockquotes, convert text to paragraphs
for my $node ($blockquote->content_list) {
# iterate through nodes under the blockquote
if (! ref($node)) {
# it is not an HTML element
# skip it is a blank line
if ($node =~ m/^\s+$/ms ) {
next;
}
my $p = HTML::Element->new('p');
$p->push_content($node);
$node = $p;
}
$new_blockquote->push_content($node);
}
$blockquote->replace_with($new_blockquote);
}
# find and replace absolute links to Techrights domain
my $absolute = 0;
for my $href ($xhtml->findnodes('//a[@href]')) {
if($href->attr('href') =~ m|^https?:/*[^/]*techrights.org/|) {
my $h = $href->attr('href');
$h =~ s|^https?:/*[^/]*techrights.org/|/|;
$href->attr('href', $h);
$absolute++;
}
}
for my $img ($xhtml->findnodes('//img[@src]')) {
if($img->attr('src') =~ m|^https?:/*[^/]*techrights.org/|) {
my $s = $img->attr('src');
$s =~ s|^https?:/*[^/]*techrights.org/|/|;
$img->attr('src', $s);
$absolute++;
}
}
if ($absolute) {
print STDERR $absolute;
print STDERR qq( TR reference), $absolute == 1 ? '' : 's';
print STDERR qq( converted to relative\n);
}
$body =~ s/^\s+$//m;
$body =~ s/\n+$//ms;
$xhtml->delete;
}
close($editor);
close($validator);
# turn 'hair space' into a normal space
$body =~ s/\x{200a}/ /gms;
# klude to deal with body element
$body =~ s|^<body>\s*||m;
$body =~ s|^</body>||m;
# strip elements and white space
my $rawtext = get_raw_text($body, $title . ' ' . $description);
return($body, $rawtext, %metadata);
}
sub get_raw_text {
# strip markup from text
my ($body, $metadata) = @_;
$body = decode_entities($body);
# pad out all elements so that there is still space when they are collapsed
$body =~ s/>/> /gms;
my $xhtml = HTML::TreeBuilder::XPath->new;
$xhtml->implicit_tags(1);
$xhtml->parse($body)
or die("Could not parse rawtext : $!\n");
my $rawtext = decode_entities($metadata) . ' '
. join(' ', map($_->as_trimmed_text, $xhtml->findnodes('//body')));
return($rawtext);
}
sub write_database {
my ($dbh, $recno, $draft_status, $new_status,
$body, $rawtext, %metadata) = @_;
my $query = "";
# clear original metadata
my $sth;
if ($draft_status) {
$sth = $dbh->prepare('DELETE FROM draft_metadata WHERE recno=?')
or die("Could not prepare deletion\n");
eval {
$sth->execute($recno);
};
if($@) {
$sth->finish;
$dbh->rollback;
exit(1); # error
}
} else {
$sth = $dbh->prepare('DELETE FROM metadata WHERE recno=?')
or die("Could not prepare deletion\n");
eval {
$sth->execute($recno);
};
if($@) {
$sth->finish;
$dbh->rollback;
exit(1); # error
}
}
# place new metadata
if ($draft_status) {
$sth = $dbh->prepare('INSERT INTO draft_metadata (recno, term, value)
VALUES (?, ?, ?)');
} else {
$sth = $dbh->prepare('INSERT INTO metadata (recno, term, value)
VALUES (?, ?, ?)');
}
for my $k (sort keys %metadata) {
for my $v (@{$metadata{$k}}) {
eval {
$sth->execute($recno, $k, $v);
};
if($@) {
$sth->finish;
$dbh->rollback;
die("Could not reinsert metadata: $!\n");
}
}
}
# update body text
if ($draft_status) {
$sth = $dbh->prepare('UPDATE draft_body SET body=? WHERE recno=?');
} else {
$sth = $dbh->prepare('UPDATE body SET body=? WHERE recno=?');
}
eval {
$sth->execute($body, $recno);
};
if($@) {
$sth->finish;
$dbh->rollback;
exit(1); # error
}
if ($draft_status) {
$sth = $dbh->prepare('REPLACE INTO draft_rawtext
(recno, fulltext) VALUES (?,?)');
} else {
$sth = $dbh->prepare('REPLACE INTO rawtext_body
(recno, fulltext) VALUES (?,?)');
}
eval {
$sth->execute($recno, $rawtext);
};
if($@) {
$sth->finish;
$dbh->rollback;
die("Could not update rawtext table\n");
}
if (! $draft_status) {
$sth = $dbh->prepare('REPLACE INTO rawtext_metadata
(recno, fulltext) VALUES (?,?)');
$rawtext = join(' ', @{$metadata{'dc.title'}},
@{$metadata{'dc.description'}});
eval {
$sth->execute($recno, $rawtext);
};
if($@) {
$dbh->rollback;
die("Could not update rawtext table\n");
}
$sth->finish;
}
# mark record as being unwritten or a draft
if ($draft_status) {
if ($new_status) {
$sth =
$dbh->prepare('UPDATE draft_keys SET written=2 WHERE recno=?');
} else {
$sth =
$dbh->prepare('UPDATE draft_keys SET written=0 WHERE recno=?');
}
} else {
$sth = $dbh->prepare('UPDATE keys SET written=0 WHERE recno=?');
}
eval {
$sth->execute($recno);
};
if($@) {
$sth->finish;
$dbh->rollback;
exit(1); # error
}
$sth->finish;
$dbh->commit;
return(1);
}
sub delete_record_and_file {
my ($dbh, $recno, $draft_status) = @_;
# need to delete file now first
my $query;
my $sth;
if ($draft_status) {
$query = qq(SELECT * FROM draft_keys WHERE recno=?);
} else {
$query = qq(SELECT * FROM keys WHERE recno=?);
}
$sth = $dbh->prepare($query);
eval {
$sth->execute($recno);
};
if($@) {
$sth->finish;
$dbh->rollback;
die("Could not DELETE '$query'\n");
}
my $file = '';
if ($draft_status) {
while (my $data = $sth->fetchrow_hashref()) {
my $recno = $data->{'recno'};
$file = qq($xhtml_draft_path/$recno.shtml);
if (-f $file && unlink $file) {
print qq($file deleted\n);
} else {
warn("$file NOT deleted\n");
}
$file = qq($gemtext_draft_path/$recno.shtml);
if (-f $file && unlink $file) {
print qq($file deleted\n);
} else {
warn("$file NOT deleted\n");
}
}
} else {
while (my $data = $sth->fetchrow_hashref()) {
my $slug = $data->{'slug'};
my $date = $data->{'date'};
my $ballast = $data->{'ballast'};
$date =~ s(^([0-9]{4})([0-9]{2})([0-9]{2})$)
($1/$2/$3)x;
if ($ballast) {
$file = qq($xhtml_path/$date/$slug.$ballast.shtml);
} else {
$file = qq($xhtml_path/$date/$slug.shtml);
}
if (-f $file && unlink $file) {
print qq($file deleted\n);
} else {
warn("$file NOT deleted\n");
}
if ($ballast) {
$file = qq($gemtext_path/n/$date/$slug.$ballast.gmi);
} else {
$file = qq($gemtext_path/n/$date/$slug.gmi);
}
if (-f $file && unlink $file) {
print qq($file deleted\n);
} else {
warn("$file NOT deleted\n");
}
}
}
# delete record from database, either post or draft
my @queries = ();
if ($draft_status) {
@queries = (
qq(DELETE FROM draft_keys WHERE recno=?),
qq(DELETE FROM draft_metadata WHERE recno=?),
qq(DELETE FROM draft_body WHERE recno=?),
);
} else {
@queries = (
qq(DELETE FROM keys WHERE recno=?),
qq(DELETE FROM metadata WHERE recno=?),
qq(DELETE FROM body WHERE recno=?),
qq(DELETE FROM rawtext_metadata WHERE recno=?),
qq(DELETE FROM rawtext_body WHERE recno=?),
);
}
my $success = 0;
for my $query (@queries) {
if ($VERBOSE > 1) {
print qq(DEL '$query'\n);
}
$sth = $dbh->prepare($query);
eval {
my $rc = $sth->execute($recno);
$success = $success + $rc;
};
if($@) {
$sth->finish;
$dbh->rollback;
die("Could not DELETE '$query'\n");
}
}
$sth->finish;
$dbh->commit or die("Could not delete.\n");
if ($success) {
return(1);
} else {
return(0);
}
}
sub iso_8601_date {
my ($date) = @_;
if ($date =~ s/^([0-9]{4})([0-9]{2})([0-9]{2})$/$1-$2-$3T00:00/) {
1;
} elsif ($date =~ s/^([0-9]{4})-([0-9]{2})-([0-9]{2})$/$1-$2-$3T00:00/) {
1;
} elsif ($date =~ m/^[0-9]{4}[0-9]{2}[0-9]{2}T[0-9]{2}:[0-9]{2}$/) {
1;
} else {
$date = 0;
}
return($date);
}
Generator/tr-generate-feed.pl
#!/usr/bin/perl
use Getopt::Long;
use Date::Calc qw/check_date Today_and_Now Delta_DHMS/;
use DBI qw(:sql_types);
use XML::RSS; # RSS for HTML
use XML::Feed; # Atom for GemText
use URI::Escape;
use DateTime;
use Encode;
use HTML::Entities qw(encode_entities_numeric decode_entities);
use Capture::Tiny qw(capture_stderr);
use Config::Tiny;
use English;
use warnings;
use strict;
if ($ENV{'USER'} eq 'root' && ! $ENV{'EUID'}) {
print STDERR qq(Cannot run as root!\nAborting\n);
exit(1);
}
our %opt;
our ( $script ) = ( $0 =~ m|(?!.*/)(.*)\.[^\.]*$| );
our $VERBOSE = 0;
GetOptions ("xml|a" => \$opt{'a'},
"body|b" => \$opt{'b'},
"config|c=s" => \$opt{'c'},
"date|d=s" => \$opt{'d'},
"gemini" => \$opt{'g'},
"number=i" => \$opt{'n'},
"output=s" => \$opt{'o'},
"xhtml|x" => \$opt{'x'},
"update|u" => \$opt{'u'},
"verbose+" => \$opt{'v'},
"help" => \$opt{'h'},
);
my $config = $opt{'c'};
if ( ! $opt{'c'} ) {
my $err = 1;
&usage($script, 'sample.conf', $err);
}
if ($opt{'h'}) {
my $err = 0;
&usage($script, $config, $err);
}
my $configuration = Config::Tiny->read($config)
or die("Could not read configuration file '$config': $!\n");
my $dbname = $configuration->{database}->{name}
or die("Database name missing from configuration file\n");
my $serverroot = $configuration->{webserver}->{serverroot}
or die("ServertRoot missing from configuration file\n");
my $dbfile = $serverroot . '/db/' . $dbname;
if ($opt{'v'}) {
$VERBOSE = $opt{'v'};
}
my %metadata; # merged
my %metadata_date; # by date only
my %metadata_number; # last n records only
# get posts on or since the date provided
if ($opt{'d'}) {
my ($year, $month, $day) = get_date($opt{'d'});
%metadata_date = &fetch_metadata_date($dbfile,$year,$month,$day);
print "$year, $month, $day\n" if ($VERBOSE);
}
# get the latest N posts from the database
if($opt{'n'}) {
# force conversion to number
my $nth = $opt{'n'} + 0;
if (!$nth) {
warn("An integer is missing. One is needed when -n is used.");
exit(1);
}
%metadata_number = &fetch_metadata_nth($nth);
}
if (!$opt{'d'} && !$opt{'n'}) {
warn("Either a date -d or a quantity -n needs to be supplied.\n");
exit(1);
}
# create union of by-date and latest Nth posts by running through both
while ((my $k, my $v) = each(%metadata_date)) {
$metadata{$k} = $v;
}
while ((my $k, my $v) = each(%metadata_number)) {
$metadata{$k} = $v;
}
my $feed;
if (defined($opt{'a'})) {
my $bodies;
if (defined($opt{'b'})) {
$bodies = &fetch_bodies(sort keys %metadata);
}
if ($opt{'x'}) {
$feed = &make_http_rss_feed(\%metadata, \$bodies);
} elsif ($opt{'g'}) {
$feed = &make_gemini_atom_feed(%metadata);
} else {
die("An option -g or -x must be provided\n");
}
} else {
if ($opt{'x'}) {
$feed = &make_xhtml_feed(%metadata);
} elsif ($opt{'g'}) {
$feed = &make_gemtext_feed(%metadata);
} else {
die("An option -g or -x must be provided\n");
}
}
# try to capture warnings sent to STDERR about "wide characters" here
my ($stderr, $result) = capture_stderr { print $feed };
exit(0);
# explain options and usage, then exit
sub usage {
my ($script, $config, $error) = @_;
print "USAGE\n\n";
print "$script [options]\n\n";
print "Extract last n records and/or starting with the specified date and";
print " form either an native list or an Atom feed. Default is a native";
print " list.\n\n";
print " -a, --xml produce an XML-based RSS 2.0 feed for XHTML\n";
print " and produce an Atom feed for GemText\n";
print " -b, --body include post body in feed\n";
print " -c, --config path to configuration file\n";
print " -d, --date YYYYMMDD format, defaults to today if missing\n";
print " -f, --force force overwrite of pre-existing destination files\n";
print " -g, --gemtext make the either the gemtext list or Atom\n";
print " feed use Gemini URLs\n";
print " -n, --number take the last n records, instead of date\n";
print " -x, --xhtml make the either the definition list or Atom\n";
print " feed use HTTP(S) URLs\n";
print " -u, --update annotate recently updated items, default is off\n";
print " -v, --verbose show debugging info\n";
print "\n";
print " -h, --help show this message\n";
print "\n";
print "Either -d or -n must be supplied, or both. If both are supplied";
print " then the result is the union of both sets.\n\n";
print "Example: \n";
print " $script -v -d 20220711 -s\n";
print "\n";
print "Example: \n";
print " $script -n 10\n";
if ($config eq 'sample.conf') {
print "\nProvide a configuration file, ";
} else {
print "\nLooking for config file in '$config',\n";
}
print <<"EOC";
for example:
[database]
name = tr-static-site-generator.sqlite3
images = tr-static-site-generator-img.sqlite3
[gemini]
geminiroot = /home/gemini/site1.example.org/
[webserver]
documentroot = /var/www/site1.example.org/htdocs
serverroot = /var/www/site1.example.org/
EOC
if ($error) {
exit(1);
}
exit(0);
}
# validate and return date from option XOR return current date
sub get_date {
my ($date) = @_;
my ($year, $month, $day);
if ($date) {
($date) = ($opt{'d'} =~ m/^([0-9]{4}-[0-9]{2}-[0-9]{2})$/)
or
($date) = ($opt{'d'} =~ m/^([0-9]{4}[0-9]{2}[0-9]{2})$/);
$date =~ s/-//g;
if (!$date) {
print STDERR qq(Invalid date '), $opt{'d'}, qq('\n);
exit(1);
}
($year,$month,$day) =
($date =~ m/^([0-9]{4})([0-9]{2})([0-9]{2})$/);
if (! check_date($year,$month,$day)) {
print STDERR qq(Invalid date '), $opt{'d'}, qq('\n);
exit(1);
}
}
if (!$date) {
($year,$month,$day) = Today_and_Now(1); # get date GMT
$year = sprintf("%04d", $year);
$month = sprintf("%02d", $month);
$day = sprintf("%02d", $day);
}
return($year, $month, $day);
}
# fetch the posts made on or since YYYY MM DD
sub fetch_metadata_date{
my ($dbfile, $year,$month,$day) = @_;
my $dbh = DBI->connect("dbi:SQLite:dbname=$dbfile", undef, undef,
{ AutoCommit => 0, RaiseError => 1 })
or die("Could not open database '$dbfile': $!\n");
my %metadata;
my $sth;
my $recno;
# get the next record number, noting which records have been updated
# the CASE clause might be unnecessary as a more complex sorting
# calculation is made in the perl code
my $query = qq(SELECT keys.recno AS recno, value, updated,
keys.ballast AS ballast, keys.slug AS slug
FROM keys
INNER JOIN (
SELECT created.recno, modified.value,
CASE
WHEN created.value<modified.value THEN 1
ELSE 0
END updated
FROM metadata created, metadata modified
WHERE modified.term="dc.date.modified"
AND substr(modified.value,1,10)>=?
AND created.term="dc.date.created"
AND created.recno=modified.recno) AS t3
ON t3.recno == keys.recno
WHERE keys.written=1
ORDER BY t3.value DESC, recno DESC);
$sth = $dbh->prepare($query)
or die("prepare statement failed: $dbh->errstr()\n");
my $date = "$year-$month-$day";
print "Date $date\n" if ($VERBOSE);
$sth->execute($date)
or die("execute statement failed: $dbh->errstr()\n");
# Read the matching records and print them out
while (my $data = $sth->fetchrow_hashref) {
my $recno = $data->{'recno'};
my $ballast = $data->{'ballast'};
my $title = '';
my $author = '';
my $description = '';
if ($opt{'u'}) {
$metadata{$recno}{'updated'} = $data->{'updated'};
} else {
$metadata{$recno}{'updated'} = 0;
}
if ($ballast) {
my $slug = uri_escape($data->{'slug'});
$metadata{$recno}{'url'} = $slug.'.'.$ballast;
} else {
my $slug = uri_escape( $data->{'slug'});
$metadata{$recno}{'url'} = $slug;
}
$metadata{$recno}{'updated'} = $data->{'updated'};
$query = qq(SELECT term,value FROM metadata WHERE recno=?);
my $sth2 = $dbh->prepare($query);
$sth2->execute($recno)
or die("execute statement failed: $dbh->errstr()\n");
my $date_created = '';
while (my $record = $sth2->fetchrow_hashref) {
my $term = $record->{'term'};
my $value = $record->{'value'};
if ($term eq 'dc.date.created') {
$date_created = $value;
$metadata{$recno}{'date.created'} = $value;
} elsif ($term eq 'dc.date.modified') {
$metadata{$recno}{'date.modified'} = $value;
} elsif ($term eq 'dc.description') {
$metadata{$recno}{'description'} = $value;
} elsif ($term eq 'dc.title') {
$metadata{$recno}{'title'} = $value;
}
}
if ($VERBOSE > 1) {
print "DC=$date_created\n";
print "DC=",$metadata{$recno}{'date.created'},"\n";
print "DM=",$metadata{$recno}{'date.modified'},"\n";
}
if (defined($metadata{$recno}{'url'})
&& $date_created) {
my $path = $date_created;
$path =~ s|^([0-9]{4})-([0-9]{2})-([0-9]{2})T.*$|$1/$2/$3|
or die("Could not validate '$path'\n");
$path = '/n/'.$path;
my $url = $path.'/'.$metadata{$recno}{'url'}.'.shtml';
$url =~ s|(?<!:)//|/|g;
$url = uri_escape($url, "?'\"");
$metadata{$recno}{'url'} = $url;
}
}
$sth->finish;
$dbh->disconnect;
return(%metadata);
}
# fetch the N most recent posts from the database
sub fetch_metadata_nth{
my ($nth) = @_;
my $dbfile="/var/www/techrights.org/db/tr-static-site-generator.sqlite3";
my $dbh = DBI->connect("dbi:SQLite:dbname=$dbfile", undef, undef,
{ AutoCommit => 0, RaiseError => 1 })
or die("Could not open database '$dbfile': $!\n");
my %metadata;
my $sth;
# get the next record number, noting which records have been updated
# the CASE clause might be unnecessary as a more complex sorting
# calculation is made in the perl code
my $query = qq(SELECT keys.recno AS recno, value, updated,
keys.ballast AS ballast, keys.slug AS slug
FROM keys
INNER JOIN (
SELECT created.recno, modified.value,
CASE
WHEN created.value<modified.value THEN 1
ELSE 0
END updated
FROM metadata created, metadata modified
WHERE created.recno=modified.recno
AND created.term="dc.date.created"
AND modified.term="dc.date.modified") AS t3
ON t3.recno == keys.recno
WHERE keys.written=1
ORDER BY t3.value DESC, recno DESC
LIMIT ?);
$sth = $dbh->prepare($query)
or die("prepare statement failed: $dbh->errstr()\n");
$sth->execute($nth)
or die("execute statement failed: $dbh->errstr()\n");
# Read the matching records and print them out
while (my $data = $sth->fetchrow_hashref) {
my $recno = $data->{'recno'};
my $ballast = $data->{'ballast'};
my $title = '';
my $author = '';
my $description = '';
if ($opt{'u'}) {
$metadata{$recno}{'updated'} = $data->{'updated'};
} else {
$metadata{$recno}{'updated'} = 0;
}
if ($ballast) {
my $slug = uri_escape($data->{'slug'});
$metadata{$recno}{'url'} = $slug.'.'.$ballast;
} else {
my $slug = uri_escape($data->{'slug'});
$metadata{$recno}{'url'} = $slug;
}
print "URL2 = ".$metadata{$recno}{'url'}."\n" if ($VERBOSE);
$query = qq(SELECT term,value FROM metadata WHERE recno=?);
my $sth2 = $dbh->prepare($query);
$sth2->execute($recno)
or die("execute statement failed: $dbh->errstr()\n");
my $date_created = '';
while (my $record = $sth2->fetchrow_hashref) {
my $term = $record->{'term'};
my $value = $record->{'value'};
if ($term eq 'dc.date.created') {
$date_created = $value;
$metadata{$recno}{'date.created'} = $value;
} elsif ($term eq 'dc.date.modified') {
$metadata{$recno}{'date.modified'} = $value;
} elsif ($term eq 'dc.description') {
$metadata{$recno}{'description'} = $value;
} elsif ($term eq 'dc.title') {
$metadata{$recno}{'title'} = $value;
} elsif ($term eq 'dc.creator') {
$metadata{$recno}{'author'} = $value;
}
}
if ($VERBOSE > 1) {
print "DC=$date_created\n";
}
if (defined($metadata{$recno}{'url'})
&& $date_created ) {
my $path = $date_created;
$path =~ s|^([0-9]{4})-([0-9]{2})-([0-9]{2})T.*$|$1/$2/$3|
or die("Could not validate '$path'\n");
$path = '/n/'.$path;
my $url = $path.'/'.$metadata{$recno}{'url'}.'.shtml';
$url =~ s|(?<!:)//|/|g;
$url = uri_escape($url, "?'\"");
$metadata{$recno}{'url'} = $url;
}
}
$sth->finish;
$dbh->disconnect;
return(%metadata);
}
sub fetch_bodies {
my (@recnos) = @_;
my $sth;
my $dbfile="/var/www/techrights.org/db/tr-static-site-generator.sqlite3";
my $dbh = DBI->connect("dbi:SQLite:dbname=$dbfile", undef, undef,
{ AutoCommit => 0, RaiseError => 1 })
or die("Could not open database '$dbfile': $!\n");
# SELECT recno FROM body WHERE recno IN (2284, 2285, 2286);
my $query = sprintf('SELECT recno, body FROM body WHERE recno IN (%s)',
join ',', ('?') x @recnos);
$sth = $dbh->prepare($query)
or die("prepare statement failed: $dbh->errstr()\n");
$sth->execute( (@recnos) )
or die("execute statement failed: $dbh->errstr()\n");
my $bodies = $sth->fetchall_hashref('recno');
$sth->finish;
$dbh->disconnect;
return( $bodies );
}
sub make_http_rss_feed {
my ($protofeed, $bodies) = @_;
# make xml/rss feed for use over HTTP / HTTPS
my $http = "https://techrights.org"; # hardcoded :(
# see https://validator.w3.org/feed/docs/error/InvalidRFC2822Date.html
my $dt = DateTime->now(time_zone=>'UTC');
my $d = $dt->strftime('%a, %d %b %Y %H:%M:%S %z');
# create an RSS 2.0 feed in UTF-8, without encoding non-ASCII entities
my $feed = XML::RSS->new(encoding=>'UTF-8',
output => "2.0",
encode_output => 0);
# chanel metadata
$feed->channel(title=>'Techrights',
link=>'https://techrights.org/',
pubDate=>$d,
description => 'bonum certa men certa',
language=>'en',
publisher=>'techrights.org',
ttl => "300",
);
# add entries for each individual post in this feed
# sorted in a special sequence, floating recently updated posts to the top
for my $recno (sort {
&by_updated($$protofeed{$b}{'date.created'},
$$protofeed{$b}{'date.modified'},
$$protofeed{$a}{'date.created'},
$$protofeed{$a}{'date.modified'})
or $$protofeed{$b}{'date.modified'}
cmp $$protofeed{$a}{'date.modified'}
or $$protofeed{$b}{'date.created'}
cmp $$protofeed{$a}{'date.created'}
or $b cmp $a
} keys %{$protofeed} ) {
# default to now, unless replaced with dc.date.modified
my $pubDate = $dt;
if ( my ($y, $m, $d, $H, $M) =
($$protofeed{$recno}{'date.modified'}
=~ m/^([0-9]{4})-([0-9]{2})-([0-9]{2})
T([0-9]{2}):([0-9]{2})/x)) {
$pubDate = DateTime->new(
year => $y,
month => $m,
day => $d,
hour => $H,
minute => $M,
time_zone => "UTC",
);
$pubDate = $pubDate->strftime('%a, %d %b %Y %H:%M:%S %z');
}
if (defined($$protofeed{$recno}{'url'})) {
my ($url, $title, $description);
$url = $http.$$protofeed{$recno}{'url'};
$url = uri_escape($url, "?'\"");
$title = $$protofeed{$recno}{'title'};
$title = encode_entities_numeric($title, '&<');
my $updated = &updated($$protofeed{$recno}{'date.created'},
$$protofeed{$recno}{'date.modified'});
if ($updated) {
$title .= ' (updated)';
}
$description = $$protofeed{$recno}{'description'};
$description = encode_entities_numeric($description, '&<');
if ( $opt{'b'} && defined($${$bodies}{$recno}{'body'} ) ) {
$feed->add_item(
link => $url,
title => $title,
description => qq(<p classs="description">)
.$description.qq(</p>\n\n)
.$${$bodies}{$recno}{'body'},
pubDate => $pubDate,
);
} else {
$feed->add_item(
link => $url,
title => $title,
description => $description,
pubDate => $pubDate,
);
}
}
}
return($feed->as_string);
}
sub make_gemini_atom_feed {
my (%protofeed) = @_;
# make xml/atom feed for use over Gemini protocol
# see https://validator.w3.org/feed/docs/error/InvalidRFC2822Date.html
# see https://www.rfc-editor.org/rfc/rfc4287.html
my $dt = DateTime->now(time_zone=>'UTC');
my $feed = XML::Feed->new('Atom');
$feed->title('Techrights');
$feed->link('gemini://gemini.techrights.org/');
$feed->self_link('gemini://gemini.techrights.org/feed.xml');
$feed->base('gemini://gemini.techrights.org/');
$feed->id('gemini://gemini.techrights.org/');
$feed->tagline('bonum certa men certa');
$feed->language('en');
$feed->modified($dt);
my $gemini = 'gemini://gemini.techrights.org/'; # hardcoded :(
# add entries for each individual post in this feed
# sorted in a special sequence, floating recently updated posts to the top
my $updated = 0;
for my $recno (sort {
&by_updated($protofeed{$b}{'date.created'},
$protofeed{$b}{'date.modified'},
$protofeed{$a}{'date.created'},
$protofeed{$a}{'date.modified'})
or $protofeed{$b}{'date.modified'}
cmp $protofeed{$a}{'date.modified'}
or $protofeed{$b}{'date.created'}
cmp $protofeed{$a}{'date.created'}
or $b cmp $a
} keys %protofeed) {
if (defined($protofeed{$recno}{'url'})) {
my $entry = XML::Feed::Entry->new();
my $url = $gemini.$protofeed{$recno}{'url'};
# URL paths ought to map 1:1 from http to gemini
$url =~ s/\.shtml$/.gmi/;
$entry->id($url);
$entry->link($url);
$updated = &updated($protofeed{$recno}{'date.created'},
$protofeed{$recno}{'date.modified'});
if ($updated && $opt{'u'}) {
$entry->title($protofeed{$recno}{'title'}.' (updated)');
} else {
$entry->title($protofeed{$recno}{'title'});
}
$entry->author($protofeed{$recno}{'author'});
if ( my ($y, $m, $d) = ($protofeed{$recno}{'date.modified'}
=~ m/^([0-9]{4})-([0-9]{2})-([0-9]{2})/)) {
my $date = DateTime->new(year=>$y, month=>$m, day=>$d);
$entry->modified($date);
}
$entry->summary($protofeed{$recno}{'description'});
$feed->add_entry($entry);
}
}
# kludge for XML::Feed's hardcoded MIME Types
# this is brittle
my $f = $feed->as_xml;
$f =~ s|^(\s*<link [^>]+) (type="text/html")|$1 type="text/gemini"|gm;
return($f);
}
sub make_xhtml_feed {
my (%protofeed) = @_;
# make XHTML document fragment listing posts in special sequence
my $feed = '';
$feed = qq(<div class="latest">\n);
$feed .= "<dl>\n";
my $count = 0;
my $old_updated = 0;
my $updated = 0;
for my $recno (sort {
&by_updated($protofeed{$b}{'date.created'},
$protofeed{$b}{'date.modified'},
$protofeed{$a}{'date.created'},
$protofeed{$a}{'date.modified'})
or $protofeed{$b}{'date.modified'}
cmp $protofeed{$a}{'date.modified'}
or $protofeed{$b}{'date.created'}
cmp $protofeed{$a}{'date.created'}
or $b cmp $a
} keys %protofeed) {
if (defined($protofeed{$recno}{'url'})) {
if ($opt{'u'}) {
$updated = &updated($protofeed{$recno}{'date.created'},
$protofeed{$recno}{'date.modified'});
if ($old_updated && !$updated) {
$feed .= "\n <dd> </dd>\n\n";
}
$old_updated = $updated;
}
my $url = uri_escape($protofeed{$recno}{'url'},"?\"");
my $title = encode_entities_numeric($protofeed{$recno}{'title'},
'&<');
my $description =
encode_entities_numeric($protofeed{$recno}{'description'},
'&<');
if ($updated) {
$feed .= ' <dt class="updated"><a href="'.$url.'">'
.$title.'</a></dt>'."\n";
$feed .= ' <dd class="updated">'
.$description."</dd>\n";
} else {
$feed .= ' <dt><a href="'.$url.'">'
.$title.'</a></dt>'."\n";
$feed .= ' <dd>'.$description."</dd>\n";
}
$count++;
}
}
$feed .= "</dl>\n";
$feed .= "</div>\n";
if ($count) {
return($feed);
} else {
return(0);
}
}
sub make_gemtext_feed {
my (%protofeed) = @_;
# make GemText document fragment listing links in special sequence
my $feed = '';
$feed = qq(\n);
my $count = 0;
my $old_updated = 0;
my $updated = 0;
for my $recno (sort {
&by_updated($protofeed{$b}{'date.created'},
$protofeed{$b}{'date.modified'},
$protofeed{$a}{'date.created'},
$protofeed{$a}{'date.modified'})
or $protofeed{$b}{'date.modified'}
cmp $protofeed{$a}{'date.modified'}
or $protofeed{$b}{'date.created'}
cmp $protofeed{$a}{'date.created'}
or $b cmp $a
} keys %protofeed) {
if (defined($protofeed{$recno}{'url'})) {
$updated = &updated($protofeed{$recno}{'date.created'},
$protofeed{$recno}{'date.modified'},);
if ($old_updated && !$updated) {
$feed .= "\n";
}
$old_updated = $updated;
$count++;
my $url = uri_escape($protofeed{$recno}{'url'},"?\"");
$url =~ s/\.\w+$/.gmi/;
my $title = $protofeed{$recno}{'title'};
my $description = $protofeed{$recno}{'description'};
if ($updated) {
$feed .= "=>\t".$url."\t".$title." (update)\n";
} else {
$feed .= "=>\t".$url."\t".$title."\n";
}
$feed .= ' '.$description."\n\n";
}
}
$feed .= "\n";
if ($count) {
return($feed);
} else {
return(0);
}
}
sub by_updated {
my ($cdate1, $mdate1, $cdate2, $mdate2) = @_;
my $updated1 = &updated($cdate1, $mdate1);
my $updated2 = &updated($cdate2, $mdate2);
return( $updated1 cmp $updated2);
}
sub updated {
my ($date1, $date2) = @_;
# check if the modification is at least 30 minutes ago
# or at least 30 minutes since record creation
my ($year1,$month1,$day1, $hour1,$min1,undef) =
($date1
=~ m/^([0-9]{4})-([0-9]{2})-([0-9]{2})T([0-9]{2}):([0-9]{2})/);
my ($year2,$month2,$day2, $hour2,$min2,undef) =
($date2
=~ m/^([0-9]{4})-([0-9]{2})-([0-9]{2})T([0-9]{2}):([0-9]{2})/);
my ($year3,$month3,$day3, $hour3,$min3,undef) = Today_and_Now(1);
# calculate the time between creation and update
my ($Dd,$Dh,$Dm,$Ds) = Delta_DHMS($year1,$month1,$day1, $hour1,$min1,00,
$year2,$month2,$day2, $hour2,$min2,00);
# has the record been updated?
if ($Dd || $Dh || $Dm) {
# calculate the time since the update in days, hours, minutes, seconds
my ($Dd,$Dh,$Dm,$Ds) = Delta_DHMS($year2,$month2,$day2,
$hour2,$min2,00,
$year3,$month3,$day3,
$hour3,$min3,00);
# if less than one day has passed but at least 30 minutes since editing
if ($Dd < 1 && ($Dh >= 1 || $Dm >= 30)) {
return(1);
}
}
return(0);
}
Generator/tr-add-and-refresh-from-db.sh
#!/bin/sh # 2022-07-26 PATH=/usr/local/bin:/usr/bin:/bin conf='/etc/tr-initialize-static-site-generator.conf' case $USER in 'tuxmachines') author='Tux Machines' ;; 'roy') author='Roy Schestowitz' ;; 'rianne') author='Rianne Schestowitz' ;; 'marius') author='Marius Nestor' ;; 'arindam') author='Arindam Giri' ;; 'trendoceans') author='Arctic' ;; *) author=$USER ;; esac # add a record tr-add-entry-sql.pl -c $conf -a "$author" # update both the XHTML and Gemtext hierarchies tr-refresh-site-from-db.sh exit 0
Generator/tr-initialize-static-site-generator.pl
#!/usr/bin/perl
use Getopt::Long;
use File::Path qw(make_path);
use DBI qw(:sql_types);
use Config::Tiny;
use strict;
use warnings;
our $VERBOSE = 0;
my %opt;
my ( $script ) = ( $0 =~ m|(?!.*/)(.*)\.[^\.]*$| );
GetOptions ("config|c=s" => \$opt{'c'},
"documentroot|r=s" => \$opt{'r'},
"serverroot|s=s" => \$opt{'s'},
"geminiroot|g=s" => \$opt{'g'},
"verbose+" => \$opt{'v'},
"help" => \$opt{'h'},
);
if ($opt{'h'}) {
my $err = 0;
usage($script, 'sample.conf', $err);
}
my $config = $opt{'c'};
if (! -f $config) {
my $err = 0;
&usage($script, $config, $err);
} elsif (! -r $config) {
die("Configuration file '$config' is not readable\n");
}
my $configuration = Config::Tiny->read($config)
or die("Could not read configurationn file '$config': $!\n");
my $documentroot = $configuration->{webserver}->{documentroot}
or die("DocumentRoot missing from configuration file\n");
my $serverroot = $configuration->{webserver}->{serverroot}
or die("ServertRoot missing from configuration file\n");
my $gemtextroot = $configuration->{gemini}->{geminiroot}
or die("GeminitRoot missing from configuration file\n");
# run time options take precedence over config file
if ($opt{'r'}) {
$documentroot = $opt{'r'};
}
if ($opt{'s'}) {
$serverroot = $opt{'s'};
}
if ($opt{'g'}) {
$gemtextroot = $opt{'g'};
}
# make sure there are leading and trailing slashes on the paths
$documentroot =~ s|(?<=[^/])$|/|;
$documentroot =~ s|//+$|/|;
$serverroot =~ s|(?<=[^/])$|/|;
$serverroot =~ s|//+$|/|;
$gemtextroot =~ s|(?<=[^/])$|/|;
$gemtextroot =~ s|//+$|/|;
print qq(server root $serverroot\n);
print qq(document root $documentroot\n);
print qq(geminit root $gemtextroot\n);
&make_webserver_paths($serverroot);
&make_db($serverroot);
&make_draft_tables($serverroot);
&make_gemtext_paths($gemtextroot);
&make_gemtext_template($gemtextroot);
&make_html_header($documentroot);
&make_html_footer($documentroot);
&make_html_navigation($documentroot);
&touch_html_feed($documentroot);
print qq(success\n);
exit(0);
sub usage {
my ($script, $config, $error) = @_;
print "USAGE\n\n";
print "$script -c CONFIG\n";
print " [-u url] [--preload text] [--skip-date] [--skip-slug]\n";
print " -a author aka dc.creator\n";
print " -c path to configuration file\n";
print " -d date in YYYYMMDD or YYYY-MM-DD format\n";
print " -m is the brief description for search engines to use";
print " -s the unique part of the file name\n";
print " -t the title to be used in the HTML document\n";
print " -u graphic URL to pre-fetch\n";
print " -v show debugging info\n";
print "\n";
print " --preload prepend text into document body\n";
print " --skip-date don't query about datetime\n";
print " --skip-slug skip slug query\n";
print "\n";
print " -h show this message\n";
print "\n";
print "The others will be prompted for if missing.\n";
if ($config eq 'sample.conf') {
print "Provide a configuration file, ";
} else {
print "Looking for config file in '$config',\n";
}
print <<EOC;
for example:
[database]
name = tr-static-site-generator.sqlite3
images = tr-static-site-generator-img.sqlite3
[gemini]
geminiroot = /home/gemini/site1.example.org/
[webserver]
documentroot = /var/www/site1.example.org/htdocs
serverroot = /var/www/site1.example.org/
EOC
if ($error) {
exit(1);
}
exit(0);
}
sub make_webserver_paths {
my ($serverroot) = @_;
my $dbpath = $serverroot . 'db/';
if ( ! -e $serverroot ) {
make_path($dbpath,{mode=>0775})
or die("Could not create database root '$dbpath' : $!\n");
if ($VERBOSE) {
print "Created directory '$dbpath'\n";
}
} elsif ( -w $serverroot ) {
if ( ! -e $dbpath ) {
make_path($dbpath,{mode=>0775})
or die("Could not create database path '$dbpath' : $!\n");
if ($VERBOSE) {
print "Created directory '$dbpath'\n";
}
}
} else {
die("Could not create server root '$serverroot' is not writable\n");
}
my $htdocs = $serverroot . 'htdocs/';
if ( ! -e $htdocs ) {
make_path($htdocs,{mode=>0775})
or die("Could not create database path '$htdocs' : $!\n");
if ($VERBOSE) {
print "Created directory '$htdocs'\n";
}
}
my $drafts = $htdocs . 'drafts/';
if ( ! -e $drafts ) {
make_path($drafts,{mode=>0775})
or die("Could not create database path '$drafts' : $!\n");
if ($VERBOSE) {
print "Created directory '$drafts'\n";
}
}
my $posts = $htdocs.'n/';
if ( ! -e $posts ) {
make_path($posts,{mode=>0775})
or die("Could not create database path '$posts' : $!\n");
if ($VERBOSE) {
print "Created directory '$posts'\n";
}
}
return(1);
}
sub make_db {
my ($serverroot, $file) = @_;
my $dbpath = $serverroot.'db/';
my $dbfile;
# post database
if ($file) {
$file = s/\.sqlite3?$//;
$dbfile = $dbpath.$file.'.sqlite3';
} else {
$dbfile = $dbpath.'tr-static-site-generator.sqlite3';
}
my $dbh = DBI->connect("dbi:SQLite:dbname=$dbfile", undef, undef,
{ AutoCommit => 0, RaiseError => 1 })
or die("Could not open database '$dbfile': $!\n");
my @queries = (
qq(
/* key index */
CREATE TABLE IF NOT EXISTS "keys" (
recno integer not null primary key,
written integer default 0 not null,
date varchar(8) not null,
ballast integer,
slug varchar(256) not null,
unique (date, slug, ballast));
/* all old_ tables are only filled manually ... one-off */
),
qq(CREATE TABLE IF NOT EXISTS "old_keys" (
recno integer not null primary key,
file varchar(256) not null);
/* metadata */
),
qq(CREATE TABLE IF NOT EXISTS metadata(
recno integer,
term varchar(25) not null,
value varchar(256) not null,
constraint fk_recno foreign key (recno)
references "keys" (recno));
),
qq(
/* body */
CREATE TABLE IF NOT EXISTS "body"(
recno integer primary key,
body text not null,
foreign key (recno)
references "keys" (recno));
),
qq(CREATE TABLE IF NOT EXISTS "rawtext_body"(
recno integer primary key unique,
fulltext text not null,
foreign key (recno)
references "keys" (recno));
),
qq(CREATE VIRTUAL TABLE IF NOT EXISTS "fts5_body" USING FTS5(
fulltext,
content=rawtext_body,
content_rowid=recno);
),
qq(
/* FTS body triggers */
CREATE TRIGGER IF NOT EXISTS rawtext_insert_body
AFTER INSERT ON rawtext_body BEGIN
INSERT INTO fts5_body(rowid, fulltext)
VALUES (new.recno, new.fulltext);
END;
),
qq(CREATE TRIGGER IF NOT EXISTS rawtext_update_body
AFTER UPDATE ON rawtext_body BEGIN
INSERT INTO fts5_body(fts5_body, rowid, fulltext)
VALUES('delete', old.recno, old.fulltext);
INSERT INTO fts5_body(rowid, fulltext)
VALUES (new.recno, new.fulltext);
END;
),
qq(CREATE TRIGGER IF NOT EXISTS rawtext_delete_body
AFTER DELETE ON rawtext_body BEGIN
INSERT INTO fts5_body(fts5_body, rowid, fulltext)
VALUES('delete', old.recno, old.fulltext);
END;
),
qq(
/* old body is only raw text in the db */
CREATE TABLE IF NOT EXISTS "old_rawtext_body"(
recno integer primary key unique,
fulltext text not null);
),
qq(CREATE VIRTUAL TABLE IF NOT EXISTS "old_fts5_body"
USING FTS5(
fulltext,
content=old_rawtext_body,
content_rowid=recno)
),
qq(CREATE TRIGGER IF NOT EXISTS old_rawtext_insert_body
AFTER INSERT ON old_rawtext_body BEGIN
INSERT INTO old_fts5_body(rowid, fulltext)
VALUES (new.recno, new.fulltext);
END;
),
qq(CREATE TRIGGER IF NOT EXISTS old_rawtext_update_body
AFTER UPDATE ON old_rawtext_body BEGIN
INSERT INTO old_fts5_body(old_fts5_body,
rowid, fulltext)
VALUES('delete', old.recno, old.fulltext);
INSERT INTO old_fts5_body(rowid, fulltext)
VALUES (new.recno, new.fulltext);
END;
),
qq(CREATE TRIGGER IF NOT EXISTS old_rawtext_delete_body
AFTER DELETE ON old_rawtext_body BEGIN
INSERT INTO old_fts5_body(old_fts_body,
rowid, fulltext)
VALUES('delete', old.recno, old.fulltext);
END;
),
qq(
/* comments are only in the old posts and only raw text in the db*/
CREATE TABLE IF NOT EXISTS "old_rawtext_comments"(
recno integer primary key unique,
fulltext text not null);
),
qq(CREATE VIRTUAL TABLE IF NOT EXISTS "old_fts5_comments"
USING FTS5(
fulltext,
content=old_rawtext_comments,
content_rowid=recno)
),
qq(CREATE TRIGGER IF NOT EXISTS old_rawtext_insert_comments
AFTER INSERT ON old_rawtext_comments BEGIN
INSERT INTO old_fts5_comments(rowid, fulltext)
VALUES (new.recno, new.fulltext);
END;
),
qq(CREATE TRIGGER IF NOT EXISTS old_rawtext_update_comments
AFTER UPDATE ON old_rawtext_comments BEGIN
INSERT INTO old_fts5_comments(old_fts5_comments,
rowid, fulltext)
VALUES('delete', old.recno, old.fulltext);
INSERT INTO old_fts5_comments(rowid, fulltext)
VALUES (new.recno, new.fulltext);
END;
),
qq(CREATE TRIGGER IF NOT EXISTS old_rawtext_delete_comments
AFTER DELETE ON old_rawtext_comments BEGIN
INSERT INTO old_fts5_comments(old_fts5_comments,
rowid, fulltext)
VALUES('delete', old.recno, old.fulltext);
END;
),
qq(
/* metadata FTS */
CREATE TABLE IF NOT EXISTS "rawtext_metadata"(
recno integer primary key unique,
fulltext text not null,
foreign key (recno)
references "keys" (recno));
),
qq(
CREATE VIRTUAL TABLE IF NOT EXISTS "fts5_metadata" USING FTS5(
fulltext,
content=rawtext_metadata,
content_rowid=recno)
),
qq(
CREATE TRIGGER IF NOT EXISTS rawtext_insert_metadata
AFTER INSERT ON rawtext_metadata BEGIN
INSERT INTO fts5_metadata(rowid, fulltext)
VALUES (new.recno, new.fulltext);
END;
),
qq(
CREATE TRIGGER IF NOT EXISTS rawtext_update_metadata
AFTER UPDATE ON rawtext_metadata BEGIN
INSERT INTO fts5_metadata(fts5_metadata,
rowid, fulltext)
VALUES('delete', old.recno, old.fulltext);
INSERT INTO fts5_metadata(rowid, fulltext)
VALUES (new.recno, new.fulltext);
END;
),
qq(
CREATE TRIGGER IF NOT EXISTS rawtext_delete_metadata
AFTER DELETE ON rawtext_metadata BEGIN
INSERT INTO fts5_metadata(fts5_metadata,
rowid, fulltext)
VALUES('delete', old.recno, old.fulltext);
END;
),
qq(
/* old metadata plus FTS */
CREATE TABLE IF NOT EXISTS "old_rawtext_metadata"(
recno integer primary key unique,
fulltext text not null,
foreign key (recno)
references "keys" (recno));
),
qq(
CREATE TABLE IF NOT EXISTS "old_metadata"(
recno integer,
term varchar(25) not null,
value varchar(256) not null);
),
qq(
CREATE VIRTUAL TABLE IF NOT EXISTS "old_fts5_metadata"
USING FTS5(
fulltext,
content=old_rawtext_metadata,
content_rowid=recno)
),
qq(
CREATE TRIGGER IF NOT EXISTS old_rawtext_insert_metadata
AFTER INSERT ON old_rawtext_metadata BEGIN
INSERT INTO old_fts5_metadata(rowid, fulltext)
VALUES (new.recno, new.fulltext);
END;
),
qq(
CREATE TRIGGER IF NOT EXISTS old_rawtext_update_metadata
AFTER UPDATE ON old_rawtext_metadata BEGIN
INSERT INTO old_fts5_metadata(old_fts5_metadata,
rowid, fulltext)
VALUES('delete', old.recno, old.fulltext);
INSERT INTO old_fts5_metadata(rowid, fulltext)
VALUES (new.recno, new.fulltext);
END;
),
qq(
CREATE TRIGGER IF NOT EXISTS old_rawtext_delete_metadata
AFTER DELETE ON old_rawtext_metadata BEGIN
INSERT INTO old_fts5_metadata(old_fts5_metadata,
rowid, fulltext)
VALUES('delete', old.recno, old.fulltext);
END;
)
);
my $sth;
foreach my $query (@queries) {
if ($VERBOSE > 1) {
print qq($query\n\n);
}
eval {
$sth = $dbh->prepare($query)
};
if ($@) {
print STDERR qq(\n),$@,qq(\n);
die("prepare statement failed: $dbh->errstr()\n$query\n");
}
eval {
$sth->execute()
};
if ($@) {
print STDERR qq(\n),$@,qq(\n);
die("prepare statement failed: $dbh->errstr()\n$query\n");
}
$sth->finish;
}
$dbh->commit;
$dbh->disconnect;
# image database
if ($file) {
$dbfile = $dbpath.$file.'.img.sqlite3';
} else {
$dbfile = $dbpath.'tr-static-site-generator-img.sqlite3';
}
$dbh = DBI->connect("dbi:SQLite:dbname=$dbfile", undef, undef,
{ AutoCommit => 0, RaiseError => 1 })
or die("Could not open database '$dbfile': $!\n");
@queries = (
qq(CREATE TABLE IF NOT EXISTS "images" (
sha256 varchar(64) unique not null,
epoch integer not null,
image varchar(256) not null)),
qq(CREATE UNIQUE INDEX IF NOT EXISTS fingerprint on images (sha256)),
);
foreach my $query (@queries) {
if ($VERBOSE > 1) {
print qq($query\n\n);
}
$sth = $dbh->prepare($query)
or die("prepare statement failed: $dbh->errstr()\n");
$sth->execute
or die("execute statement failed: $dbh->errstr()\n");
}
$dbh->commit;
$sth->finish;
$dbh->disconnect;
return(1);
}
sub make_draft_tables {
my ($serverroot, $file) = @_;
my $dbpath = $serverroot.'db/';
my $dbfile;
# draft database
if ($file) {
$file = s/\.sqlite3?$//;
$dbfile = $dbpath.$file.'.sqlite3';
} else {
$dbfile = $dbpath.'tr-static-site-generator.sqlite3';
}
my $dbh = DBI->connect("dbi:SQLite:dbname=$dbfile", undef, undef,
{ AutoCommit => 0, RaiseError => 1 })
or die("Could not open database '$dbfile': $!\n");
my @queries = (
qq(CREATE TABLE IF NOT EXISTS "draft_keys" (
recno integer not null primary key,
written integer default 0 not null,
date varchar(8) not null,
ballast integer,
slug varchar(256) not null,
unique (date, slug, ballast)) ),
qq(CREATE TABLE IF NOT EXISTS "draft_metadata"(
recno integer,
term varchar(25) not null,
value varchar(256) not null,
constraint fk_recno foreign key (recno)
references "draft_keys" (recno)) ),
qq(CREATE TABLE IF NOT EXISTS "draft_body"(
recno integer primary key,
body text not null,
foreign key (recno)
references "draft_keys" (recno)) ),
qq(CREATE TABLE IF NOT EXISTS "draft_rawtext"(
recno integer primary key unique,
fulltext text not null,
foreign key (recno)
references "keys" (recno)) )
);
my $sth;
foreach my $query (@queries) {
if ($VERBOSE > 1) {
print qq($query\n\n);
}
$sth = $dbh->prepare($query)
or die("prepare statement failed: $dbh->errstr()\n");
$sth->execute
or die("execute statement failed: $dbh->errstr()\n");
$dbh->commit;
}
$sth->finish;
$dbh->disconnect;
return(1);
}
sub make_gemtext_paths {
my ($gemtextroot) = @_;
if ( ! -e $gemtextroot ) {
make_path($gemtextroot,{mode=>0775})
or die("Could not create gemtext root path '$gemtextroot' : $!\n");
if ($VERBOSE) {
print "Created directory '$gemtextroot'\n";
}
} elsif ( ! -w $gemtextroot ) {
die("Path '$gemtextroot' is not writable\n");
}
my $gpath = $gemtextroot . 'n/';
if ( ! -e $gpath) {
make_path($gpath,{mode=>0775})
or die("Could not create gemtext path '$gpath' : $!\n");
}
my $dpath = $gemtextroot . 'drafts/';
if ( ! -e $dpath) {
make_path($dpath,{mode=>0775})
or die("Could not create gemtext drafts path '$dpath' : $!\n");
}
return(1);
}
sub make_gemtext_template {
my ($gemtextroot) = @_;
my $template = <<EOG;
Welcome to Techrights, the Gemini Capsule!
# Overview
=> /intro/ Introduction
=> /about/ About this capsule
=> /archives.gmi Capsule archives
=> /irc.gmi Contact us (IRC)
# Articles from Techrights (GemText)
## Latest Articles in Techrights
EOG
# write the template
my $gemtext = $gemtextroot.'index.template';
open(my $g, '>', $gemtext)
or die("Could not write '$gemtext' \n");
print $g $template;
close($g);
# touch the hitclock
$gemtext = $gemtextroot.'hitclock';
open($g, '>>', $gemtext)
or die("Could not write '$gemtext' \n");
print $g "";
close($g);
return(1);
}
sub make_html_footer {
my ($documentroot) = @_;
my $footer = <<EOF;
<div class="footer">
<ul>
<li><a href="/index.shtml">Home</a></li>
<li><a href="/about.shtml">About</a></li>
<li><a href="/irc.shtml">IRC</a></li>
<!--
<li><a href="/search.html">Search</a></li>
-->
<li><a href="/feed.xml">Feed</a></li>
</ul>
</div>
EOF
my $file = $documentroot.'footer.html';
open(my $f, '>', $file)
or die("Could not write '$file' \n");
print $f $footer;
close($f);
return(1);
}
sub make_html_header {
my ($documentroot) = @_;
my $header = <<EOF;
<div class="header">
<img src="/i/techrights.logo.png" width="128" height="96" alt=""/>
<div>
<h1>Techrights</h1>
<p>bonum certa men certa</p>
</div>
</div>
EOF
my $file = $documentroot.'header.html';
open(my $h, '>', $file)
or die("Could not write '$file' \n");
print $h $header;
close($h);
return(1);
}
sub make_html_navigation{
my ($documentroot) = @_;
my $navmenu = <<EOF;
<div class="navigation">
<ul>
<li><a href="/index.shtml">Home</a></li>
<li><a href="/about.shtml">About</a></li>
<li><a href="/irc.shtml">IRC</a></li>
<li><a href="gemini://gemini.techrights.org/">Gemini Edition</a></li>
<!--
<li><a href="/search.html">Search</a></li>
-->
<li><a href="/feed.xml">Feed</a></li>
</ul>
</div>
EOF
my $file = $documentroot.'navigation.html';
open(my $n, '>', $file)
or die("Could not write '$file' \n");
print $n $navmenu;
close($n);
return(1);
}
sub touch_html_feed {
my ($documentroot) = @_;
# touch placeholder for html version of feeds
my $file = $documentroot.'feeds.html';
open(my $n, '>', $file)
or die("Could not write '$file' \n");
print $n "";
close($n);
return(1);
}
Generator/tr-rss-since-scraper.sh
#!/bin/sh
# 2022-07-07
PATH=/usr/local/bin:/usr/bin:/bin
closure() {
test -d ${tmpdir} || exit 1
echo "Erasing temporary directory (${tmpdir}) and its files."
rm -f ${tmpdir}/feed-tmp.*
rmdir ${tmpdir}
}
cancel() {
echo "Cancelled."
closure
exit 2
}
# trap various signals to be able to erase temporary files
trap "cancel" 1 2 15
start=$(date -d '-2 days' +'%F')
file="/var/www/techrights.org/htdocs/feeds.html"
umask 0002
echo '<div class="feedlist">' > $file
echo -e "<h2>Other Sites</h2>\n\n" >> $file
# set up a temporary directory for many temporary files
umask 0077
tmpdir=$(mktemp -d /tmp/feeds-tmp.XXXXXX)
# fetch feeds concurrently, each to a unique temporary file
while read feed; do
tmpfile=$(mktemp -p ${tmpdir} feed-tmp.XXXXXXX)
# use -o option because of permission problems with stdout and su
tr-rss-since-scraper.pl -L -t -d $start -o ${tmpfile} ${feed} &
done <<EOF
$(grep -E -v '^#|^$' /usr/local/bin/rss-since-scraper.config)
EOF
wait
# concatenate all the temporary feed files into the destination file
cat ${tmpdir}/feed-tmp.* >> $file
echo '</div>' >> $file
chmod u=rw,g=rw,o=r $file
# clear signal trapping
trap - 1 2 15
# remove temporary files
closure
exit 0
Generator/tr-add-entry-sql.pl
#!/usr/bin/perl
use utf8;
use Getopt::Long;
use URI;
use File::Temp qw(tempfile);
use File::Path qw(make_path);
use Unicode::Normalize qw(NFKD);
use HTML::TreeBuilder::XPath;
use HTML::FormatText;
use HTML::Entities;
use DBI qw(:sql_types);
use Term::ANSIColor;
use Capture::Tiny qw(capture capture_stdout);
use Date::Calc qw(Today Today_and_Now Delta_Days);
use Term::ANSIColor qw(:constants);
use HTML::Entities;
use Config::Tiny;
use English;
use strict;
use warnings;
use open qw(:std :encoding(UTF-8));
# https://www.ietf.org/rfc/rfc2731.txt
if ($ENV{'USER'} eq 'root' && ! $ENV{'EUID'}) {
print STDERR qq(Cannot run as root!\nAborting\n);
exit(1);
}
local $OUTPUT_AUTOFLUSH=1;
our $VERBOSE = 0;
my ($author,
$config,
$date,
$description,
$help,
$preload,
$subject,
$skipdate,
$skipslug,
$slug,
$title,
$urls ) = ('') x 12;
GetOptions ("author|a=s" => \$author,
"config|c=s" => \$config,
"date|d=s" => \$date,
"description|m=s" => \$description,
"help|h" => \$help,
"preload=s" => \$preload,
"slug=s" => \$slug,
"subject|s=s" => \$subject,
"skip-date" => \$skipdate,
"skip-slug" => \$skipslug,
"title|t=s" => \$title,
"url|u=s@" => \$urls,
"verbose+" => \$VERBOSE,
);
my ( $script ) = ( $0 =~ m|(?!.*/)(.*)\.[^\.]*$|);
if (! $config) {
warn("Provide configuration file via the -c option.\n");
my $err = 1;
usage($script, 'sample.conf', $err);
}
if ($help) {
my $err = 0;
usage($script, $config, $err);
}
my $configuration = Config::Tiny->read($config)
or die("Could not read configuration file '$config': $!\n");
my $dbname = $configuration->{database}->{name}
or die("Database name missing from configuration file\n");
my $serverroot = $configuration->{webserver}->{serverroot}
or die("ServertRoot missing from configuration file\n");
$author = get_author($author); # get option or default to blank
$date = get_date($date); # get option or default to current date
$title = get_title($title); # get option or default to blank
$description = get_desc($description); # get option
$slug = get_slug($slug, $title); # calculate slug
my $dir = '';
my $dest = '';
my $done = 0;
my $checked = 0;
my $dbfile = $serverroot . '/db/' . $dbname;
my $dbh = DBI->connect("dbi:SQLite:dbname=$dbfile", undef, undef,
{ AutoCommit => 0, RaiseError => 1,
on_connect_do => "PRAGMA foreign_keys = ON",
})
or die("Could not open database '$dbfile': $!\n");
local $SIG{INT} = sub { done($dbh) }; # quit gracefully
local $SIG{HUP} = sub { done($dbh) };
local $SIG{TERM} = sub { done($dbh) };
local $SIG{STOP} = sub { done($dbh) };
my $editor = File::Temp->new( TEMPLATE => 'temp.XXXXX',
DIR => '/tmp',
SUFFIX => '.body2.tmp',
UNLINK => 1 );
my $tmpfile = $editor->filename;
-f $tmpfile && unlink($tmpfile); # clear the way for nano
my ($img, $result) = (''x2);
if ($urls) {
foreach my $u (@{$urls}) {
my $url = URI->new($u)
or die("Could not parse URL\n");
my @cmd = ('tr-scale-and-process-image.pl', $url->canonical);
system(@cmd) == 0
or die("fetching '@cmd' failed: $?\n");
my ($i, $result) = capture_stdout {system(@cmd)};
$img = $img . "\n" . $i;
}
if ($VERBOSE > 1) {
print qq(\n$img\n\n);
}
}
my $body = '';
while (!$done) {
print qq(\nMetadata:\n);
if ($skipdate) {
my @todaynow = Today_and_Now;
@todaynow = splice( @todaynow, 0, 5);
$date = sprintf("%04d-%02d-%02dT%02d:%02d", @todaynow);
undef($skipdate);
} elsif (!$date) {
$date = read_date($date);
}
$dir = $date;
$dir =~ s|^([0-9]{4})([0-9]{2})([0-9]{2})|$1/$2/$3|;
$author = read_author($author);
$title = read_title($title);
check_title($dbh, $title);
$description = read_description($description);
if (! $checked++ && !$slug && $title) {
$slug = $title;
$slug =~ s/\s+/_/g;
$slug =~ s/[[:punct:]]+/_/g; # harmonize with gemini
$slug =~ s/_+$//g;
$slug =~ s/^_+//;
$slug =~ s/_+$//;
$slug =~ s|/||g;
$slug =~ s/[^\w+-:'"?!]+//g;
# swap out diactricicals, gemini clients choke on them
$slug = NFKD($slug);
$slug =~ s/\p{NonspacingMark}//g;
if ($slug ne substr($slug,0,63)) {
print STDERR color('bold white');
print STDERR qq(Slug is too long. );
print STDERR qq(It should be less than 63 characters.\n);
print STDERR color('reset');
$checked = 0;
$slug = substr($slug,0,63);
} elsif (!$slug) {
print STDERR color('bold white');
print STDERR qq(Invalid title-based slug, );
print STDERR qq(check title or add slug\n);
print STDERR color('reset');
exit(1);
}
}
if (!$skipslug) {
$slug = read_slug($slug);
}
print "A=$author\n" if ($VERBOSE);
print "D=$date\n" if ($VERBOSE);
print "T=$title\n" if ($VERBOSE);
print "M=$description\n" if ($VERBOSE);
print "S=$slug\n" if ($VERBOSE);
print qq(\n Metadata OK ? [y/N] );
my $i = lc <STDIN>;
chomp $i;
if ($i ne 'y') {
next;
}
print "Waiting for database to unlock ...";
my $draft = 1;
my ($draft_recno, $ballast)
= get_next_available_recno($dbh, $date, $slug, $draft);
print "lock acquired\n";
if (!$draft_recno) {
$done = 0;
$checked = 0;
next;
}
my $status;
if (!$slug) {
die("Slug missing"); # kludge for debugging
}
$status = write_draft_keys($dbh, $draft_recno, $date, $slug, $ballast);
if (!$status) {
next;
}
if($status) {
$status = write_draft_metadata($dbh, $draft_recno, $title,
$author, $date, $description);
}
if ($status != 1) {
next;
}
$draft = 0;
my $notyet = 0;
if ($status) {
if ($body) {
$preload = $body;
}
$body = edit_body($preload, $tmpfile, $img);
my $i;
while(1) {
print qq(\n Body OK? [Y/n/q/d] );
$i = lc <STDIN>;
chomp $i;
if($i eq 'y' or $i eq 'd' or $i eq 'n' or $i eq 'q'){
last;
} elsif ($i eq '') {
$i = 'y';
last;
} else {
print qq(Yes, No, Quit, or Draft\n);
}
}
if ($i eq 'y') {
$done++;
} elsif ($i eq 'd') {
$done++;
$draft++;
} elsif ($i eq 'n') {
$notyet = 1;
$done++;
}
if ($i eq 'q') {
print "Rolling back and quitting\n";
$dbh->rollback;
exit(0);
} else {
$status = write_draft_body($dbh, $draft_recno, $body);
}
}
my $recno;
if ($status && $done && !$draft) {
($recno, $ballast) = get_next_available_recno($dbh, $date,
$slug, $draft);
$status = write_nondraft($dbh, $draft_recno,
$recno, $ballast, $body,
$title, $description);
}
if ($notyet) {
$done = 0;
} elsif ($status && $draft) {
print qq($draft_recno added as draft\n);
$done++;
} elsif ($status && $done) {
print qq(Record added ( $recno )\n);
$done++;
} elsif (!$notyet && $done) {
print "Rolling back\n";
$dbh->rollback;
} else {
$done = 0;
}
if (!$done) {
print "Not done yet. Rolling back\n";
$dbh->rollback;
}
}
if ($VERBOSE) {
print qq(Writing changes\n);
}
$dbh->commit;
$dbh->disconnect;
close($editor);
exit(0);
sub usage {
my ($script, $config, $error) = @_;
print "USAGE\n\n";
print "$script -c CONFIG [-hv] [-a AUTHOR] [-d DATE] [-s SLUG] [-t TITLE]";
print " [-u url] [--preload text] [--skip-date] [--skip-slug]\n";
print " -a author aka dc.creator\n";
print " -c path to configuration file\n";
print " -d date in YYYYMMDD or YYYY-MM-DD format\n";
print " -m is the brief description for search engines to use";
print " -s the unique part of the file name\n";
print " -t the title to be used in the HTML document\n";
print " -u graphic URL to pre-fetch\n";
print " -v show debugging info\n";
print "\n";
print " --preload prepend text into document body\n";
print " --skip-date don't query about datetime\n";
print " --skip-slug skip slug query\n";
print "\n";
print " -h show this message\n";
print "\n";
print "The others will be prompted for if missing.\n";
if ($config eq 'sample.conf') {
print "Provide a configuration file, ";
} else {
print "Looking for config file in '$config',\n";
}
print <<EOC;
for example:
[database]
name = tr-static-site-generator.sqlite3
images = tr-static-site-generator-img.sqlite3
[gemini]
geminiroot = /home/gemini/site1.example.org/
[webserver]
documentroot = /var/www/site1.example.org/htdocs
serverroot = /var/www/site1.example.org/
EOC
if ($error) {
exit(1);
}
exit(0);
}
sub get_author {
my ($author) = @_;
# lll - validation / lookup table?
return($author);
}
sub get_date {
my ($date) = @_;
if ($date) {
$date = iso_8601_date($date);
if (!$date) {
print STDERR color('bold white');
print STDERR qq(Invalid date : $date\n);
print STDERR color('reset');
exit(1);
}
}
if (!$date) {
my ($second,$minute,$hour,$day,$month,$year) = gmtime();
$year = sprintf("%04d", $year + 1900);
$month = sprintf("%02d", $month + 1);
$day = sprintf("%02d", $day);
$hour = sprintf("%02d", $hour);
$minute = sprintf("%02d", $minute);
$date = qq($year-$month-$day).'T'.qq($hour:$minute);
}
print qq(Date = $date\n) if $VERBOSE;
return($date);
}
sub get_title {
my ($title) = @_;
if ($title) {
$title =~ s/^\s+//;
$title =~ s/\s+$//;
}
return($title);
}
sub get_desc {
my ($description) = @_;
if ($description) {
$description =~ s/^\s+//;
$description =~ s/\s+$//;
}
return($description);
}
sub get_slug {
my ($slug, $title) = @_;
print qq(1: $slug / $title\n) if ($VERBOSE);
# the \w does not handle unicode properly, no clue why
if ($slug) {
$slug =~ s/\s+$//;
$slug =~ s/^\s+//;
$slug =~ s/\s+/_/g;
$slug =~ s/[[:punct:]]+/_/g; # harmonize with gemini
$slug =~ s/_+$//;
$slug =~ s/__+/_/gm;
$slug =~ s/[^\w\+\-\:\[\]\{\}\\?\!\@\#\&\*\$\%]+//g;
# swap out diactricicals, gemini clients choke on them
$slug = NFKD($slug);
$slug =~ s/\p{NonspacingMark}//g;
$slug = substr($slug,0,63);
if (!$slug) {
print color('bold white');
print STDERR qq(Invalid slug '$slug'\n);
print color('reset');
exit(1);
}
}
if (!$slug && $title) {
$slug = $title;
$slug =~ s/\s+$//;
$slug =~ s/^\s+//;
$slug =~ s/\s+/_/g;
$slug =~ s|/+|_|g;
$slug =~ s/[[:punct:]]+/_/g; # harmonize with gemini
$slug =~ s/_+$//;
while ($slug =~ s/__+/_/g) { 1 }
$slug =~ s/[^\w\+\-\:\[\]\{\}\?\!\@\#\&\*\$\%]+//g;
if (!$slug) {
print color('bold white');
print STDERR qq(Invalid title-based slug, );
print STDERR qq(check title or add slug\n);
print color('reset');
exit(1);
}
}
print qq(2: $slug / $title\n) if ($VERBOSE);
return($slug);
}
sub read_author {
my ($author) = @_;
my $done = 0;
while (!$done) {
print " Author: ";
if ($author) {
print "[$author] ";
if($author) {
print "\n";
}
}
my $new_author = '';
if (!$author) {
$new_author = <>;
chomp($new_author);
}
if($new_author) {
$author = $new_author;
}
# lll - lookup table or validation ?
$author =~ tr/\x00-\x08\x0a-\x1f/ /ds;
$author =~ tr/\x09/ /s;
if ($author) {
$done++;
} else {
print color('bold white');
print STDERR qq(Add author name or handle\n);
print color('reset');
}
}
return($author);
}
sub read_date {
my ($date) = @_;
my $done = 0;
while (!$done) {
print qq( Date: );
if ($date) {
print qq([$date] );
}
my $d = <>;
chomp($d);
$d =~ tr/\x00-\x08\x0a-\x1f/ /ds;
$d =~ tr/\x09/ /s;
if ($d) {
($date) = ($d =~ m/^([0-9]{4}-[0-9]{2}-[0-9]{2})$/)
or
($date) = ($d =~ m/^([0-9]{4}[0-9]{2}[0-9]{2})$/);
if (!$date) {
print color('bold white');
print STDERR qq(Invalid date '), $d, qq('\n);
print color('reset');
} else {
$date =~ s/-//g;
$done++;
}
} elsif($date) {
$done++;
} else {
my ($second,$minute,$hour,$day,$month,$year) = gmtime();
$year = sprintf("%04d", $year + 1900);
$month = sprintf("%02d", $month + 1);
$day = sprintf("%02d", $day);
$hour = sprintf("%02d", $hour);
$minute = sprintf("%02d", $minute);
$date = qq($year-$month-$day).qq(T$hour:$minute);
}
}
return($date);
}
sub read_title {
my ($title) = @_;
my $done = 0;
while (!$done) {
print qq( Title: );
if ($title) {
print qq([$title] );
}
my $t = <>;
chomp $t;
$t =~ tr/\x00-\x08\x0a-\x1f/ /ds;
$t =~ tr/\x09/ /s;
if ($t) {
$t =~ s/^\s+//;
$t =~ s/\s+$//;
$title = $t;
$done++;
} elsif ($title) {
$done++;
} else {
print color('bold white');
print STDERR qq(Invalid title '$t'\n);
print color('reset');
}
}
return($title);
}
sub read_description {
my ($description) = @_;
my $done = 0;
while (!$done) {
print qq( Description: );
if ($description) {
print qq([$description] );
}
my $d = <>;
chomp $d;
$d = Encode::encode( 'UTF-8', $d);
$d =~ tr/\x00-\x08\x0a-\x1f/ /ds;
$d =~ tr/\x09/ /s;
if ($d) {
$d =~ s/^\s+//;
$d =~ s/\s+$//;
$description = $d;
$done++;
} elsif ($description) {
$done++;
} else {
print color('bold white');
print STDERR qq(Invalid description '$d'\n);
print color('reset');
}
}
return($description);
}
sub read_slug {
my ($slug) = @_;
chomp($slug);
$slug =~ s/^\s+//;
my $done = 0;
while (!$done) {
print qq( Slug: );
if ($slug) {
print qq([$slug] );
}
my $s = <>;
chomp $s;
$s =~ s/^\s+//;
$s =~ tr/\x00-\x08\x0a-\x1f/ /ds;
$s =~ tr/\x09/ /s;
if ($s) {
$s =~ s/^\s+//;
$s =~ s/\s+$//;
$s =~ s/\s+/_/g;
$s =~ s|/+|_|g;
$slug =~ s/[[:punct:]]+/_/g; # harmonize with gemini
$slug =~ s/_+$//;
while ($s =~ s/__+/_/g) { 1 }
$s =~ s/[^\w\+\-\:\[\]\{\}\?\!\@\#\&\*\$\%]+//g;
$slug = $s;
$done++;
} elsif ($slug) {
$done++;
} else {
print color('bold white');
print STDERR qq(Invalid slug '$slug'\n);
print color('reset');
}
}
return($slug);
}
sub edit_body {
my ($preload, $tmpfile, $img) = @_;
# use a temp file to get the XHTML over to the next script
my $validator = File::Temp->new( TEMPLATE => 'temp.XXXXX',
DIR => '/tmp',
SUFFIX => '.body1.tmp',
UNLINK => 1 );
my $vfile = $validator->filename;
-f $vfile && unlink($vfile); # clear the way for nano
open(my $tf, ">", $tmpfile)
or die("Could not open '$tmpfile' for writing\n");
if ($preload) {
print $tf $preload;
}
print $tf $img;
close($tf);
my @cmd = ();
my $done = 0;
my $body = '';
my $startline = 1;
if ($preload) {
$startline = $preload =~ tr/\n//;
$startline = $startline + 2;
}
while (!$done) {
# edit body as tmpfile
# the +-1 positions the cursor at the bottom intitially
@cmd = ('/usr/bin/nano', "+$startline", '--tabstospaces', $tmpfile);
system(@cmd) == 0
or die("editing '@cmd' failed: $?\n");
# don't allow empty body
if (!-e $tmpfile || -z $tmpfile) {
next;
}
# make a copy by reading on file and writing it to another name
open(my $tf, "<", $tmpfile)
or die("Could not open '$tmpfile' for reading\n");
my $lines = "";
while (my $line = <$tf>) {
$line =~ s| \& | \& |gm;
$lines .= $line;
}
close ($tf);
# add paragraphs if there is no other XHTML markup
if ($lines !~ m/^<[^>]+>/ms) {
$lines =~ s|^|<p>\n|;
$lines =~ s|\n\n+|\n</p>\n<p>\n|gms;
$lines =~ s|$|\n</p>|s;
$lines =~ s|\s+$||ms;
}
open(my $ov, ">", $vfile)
or die("Could not copy to '$vfile'\n");
print $ov $lines;
close ($ov);
# force conversion of the second file to XHTML using tidy
@cmd = ('/usr/bin/tidy', '-m', '-q', '--show-info', 'no',
'--output-xml',
'--preserve-entities', 'yes', '-utf8', '-asxml', $vfile);
# validate the second file now that it has become XHTML
my ($stdout, $stderr, $result) = capture { system(@cmd) };
@cmd = ('/usr/bin/tidy', '-q', '--show-info', 'no',
'--output-xml',
'--preserve-entities', 'yes', '-utf8', '-xml', $vfile);
($stdout, $result) = capture_stdout {system(@cmd)};
if ($result) {
print color('bold white');
print STDERR "HTML validation failed\n";
print STDERR "\n$stderr\n";
print STDERR "press RETURN to continue editing";
print color('reset');
my $i = <>;
$done = 0;
next;
}
# begin checking the HTML
my $xhtml = HTML::TreeBuilder::XPath->new;
$xhtml->implicit_tags(1);
$xhtml->parse_file($vfile)
or die("Could not parse '$vfile' : $!\n");
# look for hotlinked images, report error if they are found
my $error = 0;
for my $hotlink ($xhtml->findnodes('//img[starts-with(@src,"http")]')) {
if ($hotlink->attr('src') =~ m|^https?:/*[^/]*techrights.org/|) {
$xhtml->delete;
next;
}
$error++;
}
if ($error) {
print color('bold white');
print STDERR "Failure: image hotlinking present.";
print STDERR " Remove it to proceed.\n";
print STDERR "press RETURN";
print color('reset');
my $i = <>;
$done = 0;
$xhtml->delete;
next;
} else {
$done++;
}
# make sure images have alt text, report error if not
$error = 0;
for my $alt ($xhtml->findnodes('//img[not(@alt)
or @alt[not(string())]]')) {
$error++;
}
if ($error) {
print STDERR color('bold white');
print STDERR "Failure: missing or empty ALT attribute in IMG.";
print STDERR " Add it to proceed.\n";
print STDERR "press RETURN";
print STDERR color('reset');
my $i = <>;
$done = 0;
$xhtml->delete;
next;
} else {
$done++;
}
# find iframes
for my $iframe ($xhtml->findnodes('//iframe')) {
print STDERR color('bold white');
print STDERR "Warning: iframe found. Delete (D), ";
print STDERR "or re-edit (R)? Enter D or R: ";
print STDERR color('reset');
my $i = <>;
chomp($i);
if ($i eq 'D' or $i eq 'd') {
$done++;
} else {
$error++;
}
}
# try again, if there were errors
if ($error) {
$done = 0;
$xhtml->delete;
next;
}
# find absolute links to Techrights domain
# complain and fix but don't start over
for my $href ($xhtml->findnodes('//a[@href]')) {
if($href->attr('href') =~ m|^https?:/*[^/]*techrights.org/|) {
$error++;
}
}
if ($error) {
print STDERR color('bold white');
print STDERR "Warning: absolute link to the Techrights ";
print STDERR "domain. ";
print STDERR color('reset');
}
$xhtml->delete;
# process the body
$xhtml = HTML::TreeBuilder::XPath->new;
$xhtml->implicit_tags(1);
$xhtml->no_expand_entities(1);
open (my $xhtmlfile, "<", $vfile)
or die("Could not open '$vfile' for reading: $!\n");
$xhtml->parse_file($xhtmlfile)
or die("Could not parse '$vfile' : $!\n");
close($xhtmlfile);
# blockquotes
for my $blockquote ($xhtml->findnodes('//blockquote')) {
my $new_blockquote = HTML::Element->new('blockquote');
# check all nodes within blockquotes, convert text to paragraphs
for my $node ($blockquote->content_list) {
# iterate through nodes under the blockquote
if (! ref($node)) {
# it is not an HTML element
# skip it is a blank line
if ($node =~ m/^\s+$/ms ) {
next;
}
my $p = HTML::Element->new('p');
$p->push_content($node);
$node = $p;
}
$new_blockquote->push_content($node);
}
$blockquote->replace_with($new_blockquote);
}
# find and replace absolute links to Techrights domain
my $absolute = 0;
for my $href ($xhtml->findnodes('//a[@href]')) {
# first for links
if($href->attr('href') =~ m|^https?:/*[^/]*techrights.org/|) {
my $h = $href->attr('href');
$h =~ s|^https?:/*[^/]*techrights.org/|/|;
$href->attr('href', $h);
$absolute++;
}
}
for my $img ($xhtml->findnodes('//img[@src]')) {
# then for images
if($img->attr('src') =~ m|^https?:/*[^/]*techrights.org/|) {
my $s = $img->attr('src');
$s =~ s|^https?:/*[^/]*techrights.org/|/|;
$img->attr('src', $s);
$absolute++;
}
}
if ($absolute) {
print STDERR $absolute;
print STDERR qq( reference), $absolute == 1 ? '' : 's';
print STDERR qq( converted to relative\n);
}
# delete iframes
for my $iframe ($xhtml->findnodes('//iframe')) {
$iframe->delete();
}
# extract the body from the tidy'd file
for my $bd ($xhtml->findnodes('//body')) {
for my $b ( $bd->detach_content ) {
eval {
$body = $body . $b->as_HTML('', ' ', {}) . "\n";
};
if ($@) {
# something went wrong
print STDERR qq(\n),$@,qq(\n);
print STDERR qq(Failed HTML. Press RETURN.\n);
$done=0;
my $i =<>;
last;
}
}
}
$body =~ s/\n+$//m;
$xhtml->delete;
}
close($editor);
close($validator);
# turn 'hair space' into a normal spaces
$body =~ s/\x{200a}/ /gm;
return($body);
}
sub get_next_available_recno {
# caclulate next record number based on what's already in the database
my ($dbh, $date, $slug, $draft) = @_;
my $recno;
# normalize the date format
$date =~ s/T.*//;
$date =~ s/-//g;
# prepare the slug search query, either for drafts or normal posts
my $sth;
if ($draft) {
$sth = $dbh->prepare('SELECT * FROM draft_keys WHERE date=? AND slug=?
ORDER BY ballast DESC LIMIT 1');
} else {
$sth = $dbh->prepare('SELECT * FROM keys WHERE date=? AND slug=?
ORDER BY ballast DESC LIMIT 1');
}
# execute search and increment ballast if the slug is already in use
$sth->execute($date,$slug);
my $ballast = 0;
if (my $row = $sth->fetchrow_hashref) {
$ballast = $row->{'ballast'} + 1;
$sth->finish;
}
# prepare to get the next record number
if ($draft) {
$sth = $dbh->prepare('SELECT max(recno) FROM draft_keys');
} else {
$sth = $dbh->prepare('SELECT max(recno) FROM keys');
}
# execute search and increment record number, or start at 1 if none
$sth->execute();
my $row = $sth->fetch;
$recno = $row->[0] ? $row->[0]+1 : 1;
$sth->finish;
return($recno, $ballast);
}
sub write_draft_keys {
# save draft information to draft_keys table
my ($dbh, $recno, $date, $slug, $ballast) = @_;
# normalize date format
$date =~ s/T.*//;
$date =~ s/-//g;
# prepare save query
my $sth = $dbh->prepare('INSERT INTO
draft_keys (recno, date, slug, ballast, written)
VALUES (?, ?, ?, ?, ?)');
# try to save draft information
eval {
$sth->execute($recno, $date, $slug, $ballast, 0);
};
if($@) {
# saving failed, it should never come to this, however
$sth->finish;
$dbh->rollback;
print color('bold white');
print STDERR "slug not unique for that date\n";
print STDERR "try again with another slug or perhaps another title\n";
print color('reset');
return(0);
}
$sth->finish;
return($recno);
}
sub write_draft_metadata {
# save draft metadata
my ($dbh, $recno, $title, $author, $date, $description) = @_;
# this check is probably redundant now
$date = iso_8601_date($date);
die unless $date;
# start with title
my ($term, $value) = ('dc.title', $title);
my $sth = $dbh->prepare('INSERT INTO
draft_metadata (recno, term, value)
VALUES(?, ?, ?)');
eval {
$sth->execute($recno, $term, $value);
};
if($@) {
$sth->finish;
$dbh->rollback;
die("Could not insert dc.title: $!\n");
}
# date created
($term, $value) = ('dc.date.created', $date);
eval {
$sth->execute($recno, $term, $value);
};
if($@) {
$sth->finish;
$dbh->rollback;
die("Could not insert dc.date.created: $!\n");
}
# date modified
($term, $value) = ('dc.date.modified', $date);
eval {
$sth->execute($recno, $term, $value);
};
if($@) {
$sth->finish;
$dbh->rollback;
die("Could not insert dc.date.created: $!\n");
}
# author
($term, $value) = ('dc.creator', $author);
eval {
$sth->execute($recno, $term, $value);
};
if($@) {
$sth->finish;
$dbh->rollback;
die("Could not insert dc.creator: $!\n");
}
# description
($term, $value) = ('dc.description', $description);
eval {
$sth->execute($recno, $term, $value);
};
if($@) {
$sth->finish;
$dbh->rollback;
die("Could not insert dc.description: $!\n");
}
# done
$sth->finish;
return(1);
}
sub write_draft_body {
# save draft body
my ($dbh, $draft_recno, $post) = @_;
my $sth;
# prepare to save body
my $query = qq(INSERT INTO draft_body (recno, body) VALUES(?, ?));
$sth = $dbh->prepare($query);
# try to save body
eval {
$sth->execute($draft_recno, $post);
};
if($@) {
$sth->finish;
$dbh->rollback;
die("Could not make draft body for $draft_recno: $!\n")
}
$sth->finish;
# strip all markup from the post body
my $rawtext = get_raw_text($post, '');
# prepare to save raw text
$query = qq(INSERT INTO draft_rawtext (recno,fulltext) VALUES(?,?));
$sth = $dbh->prepare($query);
# try to save raw text
eval {
$sth->execute($draft_recno,$rawtext)
or warn("\n");
};
if($@) {
$sth->finish;
$dbh->rollback;
die("Could not execute rawtext entry query: $! : $query\n");
}
$sth->finish;
return(1);
}
sub write_nondraft {
# save regular record
my ($dbh, $draft_recno, $recno, $ballast, $body,
$title, $description) = @_;
# prepare to save key data using data fetched from drafts table
my $query = qq(INSERT INTO keys (recno, written, date, ballast, slug)
SELECT ?, written, date, ?, slug
FROM draft_keys
WHERE draft_keys.recno=?);
my $sth = $dbh->prepare($query);
# try to save key data
eval {
$sth->execute($recno,$ballast,$draft_recno);
};
if($@) {
$sth->finish;
$dbh->rollback;
die("Could not prepare key entry: $!\n");
}
$sth->finish;
# prepare to save metadata using data fetched from drafts table
$query = qq(INSERT INTO metadata (recno, term, value)
SELECT ?, term, value
FROM draft_metadata
WHERE draft_metadata.recno=?);
$sth = $dbh->prepare($query);
# try to save metadata
eval {
$sth->execute($recno,$draft_recno);
};
if($@) {
$sth->finish;
$dbh->rollback;
die("Could not prepare metadata entry: $!\n");
}
$sth->finish;
# prepare to save body using data fetched from drafts table
$query = qq(INSERT INTO body (recno, body)
SELECT ?, body
FROM draft_body
WHERE draft_body.recno=?);
$sth = $dbh->prepare($query);
# try to save body
eval {
$sth->execute($recno,$draft_recno);
};
if($@) {
$sth->finish;
$dbh->rollback;
die("Could not prepare body entry: $!\n");
}
$sth->finish;
# strip markup from body and title, leaving only text
my $rawtext = get_raw_text($body, $title . ' ' . $description);
# prepare to save raw text
$query = qq(INSERT INTO rawtext_body (recno,fulltext) VALUES(?,?));
$sth = $dbh->prepare($query);
# try to save raw text
eval {
$sth->execute($recno,$rawtext);
};
if($@) {
$sth->finish;
$dbh->rollback;
die("Could not prepare rawtext entry ($recno): $!\n");
}
$sth->finish;
# simplified raw text metadata
$rawtext = $title . ' ' . $description;
# prepare to save raw text of metadata
$query = qq(INSERT INTO rawtext_metadata (recno,fulltext) VALUES(?,?));
$sth = $dbh->prepare($query);
# try to save raw text metadata
eval {
$sth->execute($recno,$rawtext);
};
if($@) {
$sth->finish;
$dbh->rollback;
die("Could not prepare rawtext entry ($recno): $!\n");
}
$sth->finish;
# work-around until PRAGMA foreign_keys=ON works with DBI
my @queries = (
# build up a list of queries to delete defunct draft data
qq(DELETE FROM draft_keys WHERE recno=?),
qq(DELETE FROM draft_metadata WHERE recno=?),
qq(DELETE FROM draft_body WHERE recno=?),
qq(DELETE FROM draft_rawtext WHERE recno=?),
);
for my $query (@queries) {
# loop through the list of deletion queries
$sth = $dbh->prepare($query);
eval {
$sth->execute($draft_recno);
};
if($@) {
$sth->finish;
$dbh->rollback;
warn("Could not remove old draft material ($recno): $!\n");
}
$sth->finish;
}
return(1);
}
sub get_raw_text {
# strip markup from text
my ($body, $metadata) = @_;
$body = decode_entities($body);
# pad out all elements so that there is still space when they are collapsed
$body =~ s/>/> /gms;
my $xhtml = HTML::TreeBuilder::XPath->new;
$xhtml->implicit_tags(1);
$xhtml->parse($body)
or die("Could not parse rawtext : $!\n");
my $rawtext = decode_entities($metadata) . ' '
. join(' ', map($_->as_trimmed_text, $xhtml->findnodes('//body')));
return($rawtext);
}
sub done {
# called when aborting changes to the database
my ($dbh) = @_;
# undo all the changes
$dbh->rollback;
$dbh->disconnect;
print STDERR "quitting $!\n";
exit (0);
}
sub iso_8601_date {
# standardize date string
my ($date) = @_;
if ($date =~ m/^([0-9]{4})-([0-9]{2})-([0-9]{2})
T([0-9]{2}):([0-9]{2}):([0-9]{2})/x) {
1;
} elsif ($date =~ s/^([0-9]{4})-([0-9]{2})-([0-9]{2})$/$1-$2-$3T00:00/) {
1;
} elsif ($date =~ s/^([0-9]{4})-([0-9]{2})-([0-9]{2})T([0-9]{2}:[0-9]{2})$/$1-$2-$3T$4/) {
1;
} elsif ($date =~ m/^[0-9]{4}[0-9]{2}[0-9]{2}T[0-9]{2}:[0-9]{2}$/) {
1;
} else {
$date = 0;
}
return($date);
}
sub check_title {
# check if a title has been used already recently
my ($dbh, $title) = @_;
# find date when (if) that title was most recently used
my $sth = $dbh->prepare('SELECT t2.value FROM metadata as t1
INNER JOIN metadata AS t2
ON t1.recno=t2.recno AND t1.term="dc.title"
AND t1.value=? AND t2.term="dc.date.created"
ORDER BY t2.value DESC LIMIT 1;');
# search
eval {
$sth->execute($title);
};
if($@) {
$sth->finish;
$dbh->rollback;
exit(1); # error
}
# check search results
if (my $row = $sth->fetchrow_hashref) {
my $d1 = $row->{value};
if ( my ($y1, $m1, $d1, $H1, $M1) =
($d1 =~ m/^(\d{4})-(\d{2})-(\d{2})T/) ) {
my ($Dd) = Delta_Days( $y1, $m1, $d1, Today(1) );
# complain if too fresh
if ($Dd < 7) {
my $d = $Dd + 1;
print STDERR color('bold white');
print STDERR qq(\t Warning: that title was used less than $d );
print STDERR $d==1 ? 'day' : 'days';
print STDERR qq( ago );
print STDERR color('reset'), " ";
print STDERR "\n"
}
}
}
$sth->finish;
return(1);
}
Generator/tr-static-site-generator.sqlite3.schema
CREATE TABLE IF NOT EXISTS "keys"(
recno integer not null primary key,
written integer default 0 not null,
date varchar(8) not null,
ballast integer,
slug varchar(256) not null,
unique(date, slug, ballast)
);
CREATE TABLE metadata(
recno integer,
term varchar(25) not null,
value varchar(256) not null,
constraint fk_recno foreign key(recno)
references "keys"(recno) on delete cascade
);
CREATE TABLE IF NOT EXISTS "body"(
recno integer primary key,
body text not null,
foreign key(recno)
references "keys"(recno) on delete cascade
);
CREATE TABLE IF NOT EXISTS "rawtext_body"(
recno integer primary key unique,
fulltext text not null,
foreign key(recno)
references "keys"(recno) on delete cascade
);
CREATE TABLE IF NOT EXISTS "draft_keys"(
recno integer not null primary key,
written integer default 0 not null,
date varchar(8) not null,
ballast integer,
slug varchar(256) not null,
unique(date, slug, ballast)
);
CREATE TABLE IF NOT EXISTS "draft_metadata"(
recno integer,
term varchar(25) not null,
value varchar(256) not null,
constraint fk_recno foreign key(recno)
references "draft_keys"(recno)
on delete cascade
);
CREATE TABLE IF NOT EXISTS "draft_body"(
recno integer primary key,
body text not null,
foreign key(recno)
references "draft_keys"(recno)
on delete cascade
);
CREATE TABLE draft_rawtext(
recno integer primary key unique,
fulltext text not null,
foreign key(recno) references "keys"(recno) on delete cascade
);
CREATE TABLE IF NOT EXISTS "rawtext_metadata"(
recno integer primary key unique,
fulltext text not null,
foreign key(recno)
references "keys"(recno) on delete cascade
);
CREATE VIRTUAL TABLE "fts5_body" USING FTS5(
fulltext,
content=rawtext_body,
content_rowid=recno
)
/* fts5_body(
fulltext
) */;
CREATE TABLE IF NOT EXISTS 'fts5_body_data'(id INTEGER PRIMARY KEY, block BLOB);
CREATE TABLE IF NOT EXISTS 'fts5_body_idx'(
segid,
term,
pgno,
PRIMARY KEY(segid, term)
) WITHOUT ROWID;
CREATE TABLE IF NOT EXISTS 'fts5_body_docsize'(id INTEGER PRIMARY KEY, sz BLOB);
CREATE TABLE IF NOT EXISTS 'fts5_body_config'(k PRIMARY KEY, v) WITHOUT ROWID;
CREATE VIRTUAL TABLE "fts5_metadata" USING FTS5(
fulltext,
content=rawtext_metadata,
content_rowid=recno
)
/* fts5_metadata(
fulltext
) */;
CREATE TABLE IF NOT EXISTS 'fts5_metadata_data'(id INTEGER PRIMARY KEY, block BLOB);
CREATE TABLE IF NOT EXISTS 'fts5_metadata_idx'(
segid,
term,
pgno,
PRIMARY KEY(segid, term)
) WITHOUT ROWID;
CREATE TABLE IF NOT EXISTS 'fts5_metadata_docsize'(id INTEGER PRIMARY KEY, sz BLOB);
CREATE TABLE IF NOT EXISTS 'fts5_metadata_config'(k PRIMARY KEY, v) WITHOUT ROWID;
CREATE TABLE IF NOT EXISTS "old_keys"(
recno integer not null primary key,
file varchar(256) not null
);
CREATE TABLE IF NOT EXISTS "old_metadata"(
recno integer,
term varchar(25) not null,
value varchar(256) not null
);
CREATE TABLE IF NOT EXISTS "old_rawtext_body"(
recno integer primary key unique,
fulltext text not null
);
CREATE TABLE IF NOT EXISTS "old_rawtext_comments"(
recno integer primary key unique,
fulltext text not null
);
CREATE TABLE IF NOT EXISTS "old_rawtext_metadata"(
recno integer primary key unique,
fulltext text not null
);
CREATE VIRTUAL TABLE "old_fts5_body" USING FTS5(
fulltext,
content=old_rawtext_body,
content_rowid=recno
)
/* old_fts5_body(
fulltext
) */;
CREATE TABLE IF NOT EXISTS 'old_fts5_body_data'(id INTEGER PRIMARY KEY, block BLOB);
CREATE TABLE IF NOT EXISTS 'old_fts5_body_idx'(
segid,
term,
pgno,
PRIMARY KEY(segid, term)
) WITHOUT ROWID;
CREATE TABLE IF NOT EXISTS 'old_fts5_body_docsize'(id INTEGER PRIMARY KEY, sz BLOB);
CREATE TABLE IF NOT EXISTS 'old_fts5_body_config'(k PRIMARY KEY, v) WITHOUT ROWID;
CREATE VIRTUAL TABLE "old_fts5_comments" USING FTS5(
fulltext,
content=old_rawtext_comments,
content_rowid=recno
)
/* old_fts5_comments(
fulltext
) */;
CREATE TABLE IF NOT EXISTS 'old_fts5_comments_data'(id INTEGER PRIMARY KEY, block BLOB);
CREATE TABLE IF NOT EXISTS 'old_fts5_comments_idx'(
segid,
term,
pgno,
PRIMARY KEY(segid, term)
) WITHOUT ROWID;
CREATE TABLE IF NOT EXISTS 'old_fts5_comments_docsize'(id INTEGER PRIMARY KEY, sz BLOB);
CREATE TABLE IF NOT EXISTS 'old_fts5_comments_config'(k PRIMARY KEY, v) WITHOUT ROWID;
CREATE VIRTUAL TABLE "old_fts5_metadata" USING FTS5(
fulltext,
content=old_rawtext_metadata,
content_rowid=recno
)
/* old_fts5_metadata(
fulltext
) */;
CREATE TABLE IF NOT EXISTS 'old_fts5_metadata_data'(id INTEGER PRIMARY KEY, block BLOB);
CREATE TABLE IF NOT EXISTS 'old_fts5_metadata_idx'(
segid,
term,
pgno,
PRIMARY KEY(segid, term)
) WITHOUT ROWID;
CREATE TABLE IF NOT EXISTS 'old_fts5_metadata_docsize'(id INTEGER PRIMARY KEY, sz BLOB);
CREATE TABLE IF NOT EXISTS 'old_fts5_metadata_config'(k PRIMARY KEY, v) WITHOUT ROWID;
CREATE TRIGGER rawtext_insert_body
AFTER INSERT ON rawtext_body BEGIN
INSERT INTO fts5_body(rowid, fulltext)
VALUES (new.recno, new.fulltext);
END;
CREATE TRIGGER rawtext_update_body
AFTER UPDATE ON rawtext_body BEGIN
INSERT INTO fts5_body(fts5_body, rowid, fulltext)
VALUES('delete', old.recno, old.fulltext);
INSERT INTO fts5_body(rowid, fulltext)
VALUES (new.recno, new.fulltext);
END;
CREATE TRIGGER rawtext_delete_body
AFTER DELETE ON rawtext_body BEGIN
INSERT INTO fts5_body(fts5_body, rowid, fulltext)
VALUES('delete', old.recno, old.fulltext);
END;
CREATE TRIGGER rawtext_insert_metadata
AFTER INSERT ON rawtext_metadata BEGIN
INSERT INTO fts5_metadata(rowid, fulltext)
VALUES (new.recno, new.fulltext);
END;
CREATE TRIGGER old_rawtext_insert_b
AFTER INSERT ON old_rawtext_body BEGIN
INSERT INTO old_fts5_body(rowid, fulltext)
VALUES (new.recno, new.fulltext);
END;
CREATE TRIGGER old_rawtext_update_b
AFTER UPDATE ON old_rawtext_body BEGIN
INSERT INTO old_fts5_body(old_fts5_body, rowid, fulltext)
VALUES('delete', old.recno, old.fulltext);
INSERT INTO old_fts5_body(rowid, fulltext)
VALUES (new.recno, new.fulltext);
END;
CREATE TRIGGER old_rawtext_delete_b
AFTER DELETE ON old_rawtext_body BEGIN
INSERT INTO old_fts5_body(old_fts_body, rowid, fulltext)
VALUES('delete', old.recno, old.fulltext);
END;
CREATE TRIGGER old_rawtext_insert_c
AFTER INSERT ON old_rawtext_comments BEGIN
INSERT INTO old_fts5_comments(rowid, fulltext)
VALUES (new.recno, new.fulltext);
END;
CREATE TRIGGER old_rawtext_update_c
AFTER UPDATE ON old_rawtext_comments BEGIN
INSERT INTO old_fts5_comments(old_fts5_comments,
rowid, fulltext)
VALUES('delete', old.recno, old.fulltext);
INSERT INTO old_fts5_comments(rowid, fulltext)
VALUES (new.recno, new.fulltext);
END;
CREATE TRIGGER old_rawtext_delete_c
AFTER DELETE ON old_rawtext_comments BEGIN
INSERT INTO old_fts5_comments(old_fts5_comments,
rowid, fulltext)
VALUES('delete', old.recno, old.fulltext);
END;
CREATE TRIGGER old_rawtext_insert_m
AFTER INSERT ON old_rawtext_metadata BEGIN
INSERT INTO old_fts5_metadata(rowid, fulltext)
VALUES (new.recno, new.fulltext);
END;
CREATE TRIGGER old_rawtext_update_m
AFTER UPDATE ON old_rawtext_metadata BEGIN
INSERT INTO old_fts5_metadata(old_fts5_metadata,
rowid, fulltext)
VALUES('delete', old.recno, old.fulltext);
INSERT INTO old_fts5_metadata(rowid, fulltext)
VALUES (new.recno, new.fulltext);
END;
CREATE TRIGGER old_rawtext_delete_m
AFTER DELETE ON old_rawtext_metadata BEGIN
INSERT INTO old_fts5_metadata(old_fts5_metadata,
rowid, fulltext)
VALUES('delete', old.recno, old.fulltext);
END;
CREATE TRIGGER rawtext_update_metadata
AFTER UPDATE ON rawtext_metadata BEGIN
INSERT INTO fts5_metadata(fts5_metadata, rowid, fulltext)
VALUES('delete', old.recno, old.fulltext);
INSERT INTO fts5_metadata(rowid, fulltext)
VALUES (new.recno, new.fulltext);
END;
CREATE TRIGGER rawtext_delete_metadata
AFTER DELETE ON rawtext_metadata BEGIN
INSERT INTO fts5_metadata(fts5_metadata, rowid, fulltext)
VALUES('delete', old.recno, old.fulltext);
END;
Generator/tr-scale-and-process-image.pl
#!/usr/bin/perl -T
use utf8;
use Getopt::Long;
use URI::Escape;
use URI;
use File::Temp qw(tempfile);
use Digest::SHA qw(sha256);
use File::Copy qw(copy);
use File::Basename qw/fileparse basename/;
use Image::Magick;
use Capture::Tiny qw(capture_stdout);
use Date::Calc qw/Today/;
use File::Path qw(make_path);
use Cwd qw(abs_path);
use DBI qw(:sql_types);
use Config::Tiny;
use English;
use strict;
use warnings;
our $VERBOSE = 0;
my $config = '';
my $dpath = &dpath('/i');
my $help = 0;
my $db = 0;
my $delete = 0;
if ($ENV{'USER'} eq 'root' && ! $ENV{'EUID'}) {
print STDERR qq(Cannot run as root!\nAborting\n);
exit(1);
}
GetOptions ("config|c=s" => \$config,
"database|d" => \$db,
"delete" => \$delete,
"verbose+" => \$VERBOSE,
"help|h" => \$help,
);
my ( $script ) = ( $0 =~ m|(?!.*/)(.*)\.[^\.]*$|);
if (! $config) {
warn("Provide configuration file via the -c option.\n");
my $err = 1;
usage($script, 'sample.conf', $err);
}
if ($help) {
my $err = 0;
usage($script, $config, $err);
}
my $configuration = Config::Tiny->read($config)
or die("Could not read configuration file '$config': $!\n");
my $dbname = $configuration->{database}->{images}
or die("Database name missing from configuration file\n");
my $serverroot = $configuration->{webserver}->{serverroot}
or die("ServertRoot missing from configuration file\n");
my $dbfile = $serverroot . '/db/' . $dbname;
if ($VERBOSE > 1) {
print qq(dbfile = $dbfile\n);
}
my $documentroot = $configuration->{webserver}->{documentroot}
or die("DocumentRoot missing from configuration file\n");
# untaint the $PATH
local $ENV{'PATH'} = '/usr/local/bin:/usr/bin:/bin';
# make sure the database file is there, but don't check schema
if ($db && ! -e $dbfile) {
&prepare_database($dbfile);
} elsif (! -e $dbfile) {
print "\nMissing database file \"$dbfile\"\n";
print "Try using the --database option to create it.\n\n";
my $err = 1;
&usage($script, $config, $err);
exit(1);
} elsif ($db) {
print "Database file \"$dbfile\" already exists\n";
print "Ignoring the --database option\n";
}
if ($help) {
my $err = 0;
&usage($script, 'sample.conf', $err);
exit(0);
}
if ($#ARGV > 0) {
print "Too many command line arguments. Maybe quotes are missing?\n";
my $err = 1;
&usage($script, $config, $err);
exit(1);
}
# a URL is obligatory
my $input = shift || 0;
if (! $input) {
my $err = 1;
&usage($script, $config, $err);
exit(1);
}
my ($checksum) = ($input =~ m/^([a-fA-F0-9]{64})$/);
if ($checksum && $delete) {
&delete_from_db_and_file_system(0, $checksum);
exit(1);
}
# untaint the URL argument
my ($canonical,$dfile,$dext) = &cleaned_url($input, $serverroot);
# save the fetched image in a ephemeral file name
my $tmp = File::Temp->new( TEMPLATE => 'temp.XXXXX',
DIR => '/tmp',
SUFFIX => '.fetch.techrights.img.tmp',
UNLINK => 1 );
my $tmpfile = '';
if ($canonical =~ m|https?:|) {
$tmpfile = &fetch_image($canonical, $tmp);
} elsif ($canonical =~ m|^file:|) {
$tmpfile = &fetch_local_image($canonical, $tmp);
}
if (!$dext) {
($dext) = &verify_format($tmp);
}
my ($file, $dup);
my $type;
my $image = 0;
$documentroot =~ s|(?=[^/])$|/|;
if ($delete) {
&delete_from_db_and_file_system($tmpfile, 0);
exit(1);
}
if (&isimage($tmpfile)) {
if ($VERBOSE) {
print qq(This is an IMAGE\n);
}
$type = 'image';
($file, $dup)= &deduplicate($dbfile, $tmpfile, $documentroot,
$dpath, $dfile, $dext, $type);
} elsif (&isvideo($tmpfile)) {
if ($VERBOSE) {
print qq(This is a VIDEO\n);
}
$dpath = &dpath('/v');
$type = 'video';
($file, $dup)= &deduplicate($dbfile, $tmpfile, $documentroot,
$dpath, $dfile, $dext, $type);
} else {
print qq(Unkown type\n);
exit(1);
}
unlink($tmpfile)
or die("Could not remove '$tmpfile' from upload directory\n");
# retrieve an existing thumbnail from the db or make a new one
my ($thumbnail, $width, $height) = (0) x 3;
if (!$dup) {
# the main file is new, make a new thumbnail for it
if ($type eq 'image') {
($thumbnail, $width, $height) =
&make_image_thumbnail($dbfile, $documentroot, $file);
} elsif ($type eq 'video') {
($thumbnail, $width, $height) =
&make_video_thumbnail($dbfile, $documentroot, $file);
}
# print the matching XHTML markup
my $full = $file;
if ($thumbnail) {
my $thumb = $thumbnail;
$full =~ s/%/%25/g;
$thumb =~ s/%/%25/g;
my $link = qq(\n<p class="center">)
. qq(<img src="$full" alt="" /></p>);
print qq($link\n\n);
$link = qq(\n<p class="side"><a href="$full">)
. qq(<img src="$thumb" width="$width" height="$height")
. qq( alt="" /></a></p>);
print qq($link\n);
} else {
$full =~ s/%/%25/g;
my $link = qq(\n<p class="center">)
. qq(<img src="$full" alt="" /></a></p>);
print qq($link\n\n);
$link = qq(\n<p class="side"><a href="$full">)
. qq(<img src="$full" width="$width" height="$height")
. qq( alt="" /></a></p>);
print qq($link\n);
}
} else {
# the main file already exists
my ($width, $height) = (0, 0);
my ($f, $d, $s) = fileparse($file, qr/\.[^.*]*$/);
# videos have png thumbnails, should this be in the image table?
if ($s eq '.webm'
or $s eq '.ogv'
or $s eq '.ogm'
or $s eq '.ogg'
or $s eq '.mp4'
) {
$s = '.png';
}
my $thumb = qq($d$f.thumbnail$s);
my $full = $file;
my $img;
if (-f $documentroot.$thumb) {
if ($VERBOSE) {
print "DUP with thumbnail $thumb $type\n";
}
my $image = Image::Magick->new;
open(my $inputimage, '<', $documentroot.$thumb);
my $err = $image->Read(file=>\$inputimage);
if ($err) {
print "Error: $err\n";
exit(1);
}
close($inputimage);
# read width and height from the existing thumbnail file,
($width,$height) = $image->Get('width','height');
# print the matching XHTML markup
$full =~ s/%/%25/g;
$thumb =~ s/%/%25/g;
my $link = qq(\n<p class="center">)
. qq(<img src="$full" alt="" /></p>);
print qq($link\n);
$link = qq(\n<p class="side"><a href="$full">)
. qq(<img src="$thumb" width="$width" height="$height")
. qq( alt="" /></a></p>);
print qq($link\n);
} else {
if ($VERBOSE) {
print "DUP but lacking thumbnail $type\n";
}
# create a thumbnail, or else remove all traces of failure
if ($type eq 'image') {
($thumbnail, $width, $height) =
&make_image_thumbnail($dbfile, $documentroot, $file);
} elsif ($type eq 'video') {
($thumbnail, $width, $height) =
&make_video_thumbnail($dbfile, $documentroot, $file);
}
if ($thumbnail) {
# print the matching XHTML markup
$full =~ s/%/%25/g;
$thumbnail =~ s/%/%25/g;
my $link = qq(\n<p class="cener">)
. qq(<img src="$full" /></p>);
print qq($link\n\n);
$link = qq(\n<p class="side"><a href="$full">)
. qq(<img src="$thumbnail" width="$width")
. qq( height="$height" alt="" /></a></p>);
print qq($link\n);
}
}
}
exit(0);
sub usage {
my ($script, $config, $error) = @_;
$script = basename($script);
print <<"EOH";
Usage:
$script [option] url
Run this script with the URL to an image file as the first
argument and it will create a thumbnail in the destination
directory, move the original there too, and then display the
relevant HTML markup to the image and it's thumbnail.
If the image is less than 250 pixels on its largest axis, then
no thumbnail will be generated and only the original will be used.
The aspect ratio will be preserved. Thumbnails for images in
landscape mode will have a maximum width of 250 and those in
portrait mode will have a maximum height of 250.
-d, --database initialize database if missing
--delete remove the file identified by the designate URL or checksum
-v increase debugging verbosity
-h this help text
EOH
if ($config eq 'sample.conf') {
print "Provide a configuration file, ";
} else {
print "Looking for config file in '$config',\n";
}
print <<EOC;
for example:
[database]
name = tr-static-site-generator.sqlite3
images = tr-static-site-generator-img.sqlite3
[gemini]
geminiroot = /home/gemini/site1.example.org/
[webserver]
documentroot = /var/www/site1.example.org/htdocs
serverroot = /var/www/site1.example.org/
EOC
if ($error) {
exit(1);
}
exit(0);
}
sub dpath {
my ( $dpath ) = @_;
# append year and month to target path
my $gmt = 1;
my ($year,$month,$day) = Today($gmt);
$year = sprintf("%04d", $year);
$month = sprintf("%02d", $month);
$dpath = $dpath.'/'.$year.'/'.$month;
return($dpath);
}
sub cleaned_url {
my ($input, $serverroot) = @_;
my $uri = URI->new($input);
my ($canonical, $scheme, $host, $port, $path, $file) = (0) x 6;
$scheme = $uri->scheme || 0;
if ($scheme eq 'https' || $scheme eq 'http') {
$host = $uri->host || 0;
if (defined( $uri->path)) {
$path = $uri->path;
}
$port = $uri->port;
if ($path =~ m|\;.*$|
|| $path =~ m|[\000-\037]|) {
die("Bad URL path\n");
}
($file) = ($path =~ m#([^/\;]*)(\;|$)#);
$canonical = "$scheme://$host:$port$path";
if ($VERBOSE > 1) {
print qq(URI= $uri\n);
print qq( $scheme\n $host \t$port \t$path\n);
print qq( $canonical\n);
print qq( File: $file\n);
}
} elsif ($scheme eq 'file') {
my $uploads = File::Spec->canonpath($serverroot . "/uploads");
my $p = $input;
$p =~ s|^file:||;
$p =~ s|^//+|/|;
my ($f, $path, undef) = fileparse($p);
if (!$path ) {
die("Bad path '$input'\n");
}
if ($path !~ m/^$uploads/) {
die("Path outside uploads dir '$input'\n");
}
if (!$f) {
die("Bad or missing filename: $file\n");
}
$canonical = "file://$path/$f";
$file = $f;
} else {
warn("Unconfigured protocol: $scheme\n");
exit(1);
}
my ($dfile, $dext) = (0) x 2;
($dfile, $dext) = ($file =~ m/([^\.]*)\.?([^\.]*)$/);
$dext = lc($dext);
if ($VERBOSE > 1) {
print qq( F: $file\n);
print qq( P: $dpath\n);
print qq( N: $dfile\t$dext\n);
}
return($canonical, $dfile, $dext);
}
sub fetch_image {
my ($canonical, $tmp) = @_;
# use a temp file while checking duplicate and such
my $tmpfile = $tmp->filename;
-f $tmpfile && unlink($tmpfile); # clear the way for wget
# wget does not acknowledge either self-signed or Let's Encrypt
my $noise = '--quiet';
if ($VERBOSE > 1) {
$noise = '--verbose';
}
my @cmd = ('wget', '--no-check-certificate', $noise,
'--user-agent', 'techrights.org',
'--output-document', $tmpfile, "$canonical");
system(@cmd) == 0
or die("system '@cmd' failed: $?\n");
return($tmpfile);
}
sub fetch_local_image {
my ($canonical, $tmp) = @_;
# extract and untaint file name
my $f = '';
if ($canonical =~ m/^([^\x3b]+)$/) {
$f = $1;
} else {
die("Wonky file name '$canonical'\n");
}
$f =~ s/^file://;
$f = abs_path($f);
my $file = '';
if ($f =~ m/^([^\x3b]+)$/) {
$file = $1;
} else {
die("Tainted\n");
}
# make sure the source file is really there first
if (! -e $file) {
die("The file '$file' does not exist.\n");
} elsif (! -f $file) {
die("The file '$file' exists but is not a regular file.\n");
}
# use a temp file while checking duplicate and such
my $tmpfile = $tmp->filename;
-f $tmpfile && unlink($tmpfile); # clear the way for wget
# use a temporary file instead
copy($file, $tmpfile)
or die("Could not relocate from '$file' to '$tmpfile'\n");
# clean up
unlink($file);
return($tmpfile);
}
sub verify_format {
my ($tmp) = @_;
my $dext = 'image';
open(my $inputimage, '<', $tmp);
my $image = Image::Magick->new;
$image->Read(file=>\$inputimage);
close($inputimage);
my ($id) = capture_stdout{ $image->Identify() };
my ($format) = ($id =~ m/Format:\s+(\w+)/);
$format = lc($format);
if ($VERBOSE > 1) {
print " O: ",$format,"\n";
}
if ($format eq 'jpeg'
or $format eq 'jpg'
or $format eq 'png'
or $format eq 'gif'
or $format eq 'avif'
or $format eq 'svg') {
return($format);
} else {
if ($VERBOSE) {
print qq(Unknown file: $dext\n);
}
return(0);
}
}
sub delete_from_db_and_file_system {
my ($tmpfile, $fingerprint) = @_;
if (-f $tmpfile) {
# calcuate the checksum
my $sha = Digest::SHA->new('sha256_hex');
$sha->addfile($tmpfile);
$fingerprint = $sha->hexdigest;
}
if ($VERBOSE) {
print qq( SHA256: $fingerprint\n);
}
# look up the checksum in the db
my $dbh = DBI->connect("dbi:SQLite:dbname=$dbfile", undef, undef,
{ AutoCommit => 0, RaiseError => 1 })
or die("Could not open database '$dbfile': $!\n");
my $query = qq(SELECT * FROM images WHERE sha256=?);
my $sth = $dbh->prepare($query)
or die("prepare statement failed: $dbh->errstr()\n");
$sth->execute($fingerprint)
or die("execute statement failed: $dbh->errstr()\n");
my $dup = 0;
# now check if the image is a duplicate
if (my $data = $sth->fetchrow_hashref) {
# it is a duplicate
my $imagefile = $documentroot.$data->{'image'};
$query = qq(DELETE FROM images WHERE sha256=?);
$sth = $dbh->prepare($query)
or die("prepare statement failed: $dbh->errstr()\n");
$sth->execute($fingerprint)
or die("execute statement failed: $dbh->errstr()\n");
if (-f $imagefile) {
my $thumbnail = $imagefile;
$thumbnail =~ s/\.([^\.]+)$/.thumbnail.$1/;
unlink($imagefile)
or die("Could not unlink '$imagefile' :$!\n");
unlink($thumbnail)
or die("Could not unlink '$thumbnail' :$!\n");
print qq(Deleted.\n);
}
$sth->finish;
$dbh->commit;
} else {
print qq(Not Found for deletion. No changes.\n);
$sth->finish;
$dbh->disconnect;
}
$sth->finish;
$dbh->disconnect;
exit(0);
}
sub deduplicate {
my ($dbfile, $tmpfile, $documentroot, $dpath, $dfile, $dext, $type) = @_;
# look for sha256 checksum in database table
# calcuate the checksum
my $sha = Digest::SHA->new('sha256_hex');
$sha->addfile($tmpfile);
my $fingerprint = $sha->hexdigest;
if ($VERBOSE) {
print qq( SHA256: $fingerprint\n);
}
if ($type eq 'image') {
if ($dext ne 'svg') {
# limit the number of iterations in an animated loop
&finiteloop($tmpfile);
}
}
# look up the checksum in the db
my $dbh = DBI->connect("dbi:SQLite:dbname=$dbfile", undef, undef,
{ AutoCommit => 0, RaiseError => 1 })
or die("Could not open database '$dbfile': $!\n");
my $query = qq(SELECT * FROM images WHERE sha256=?);
my $sth = $dbh->prepare($query)
or die("prepare statement failed: $dbh->errstr()\n");
$sth->execute($fingerprint)
or die("execute statement failed: $dbh->errstr()\n");
my $file = '';
my %data;
my $dup = 0;
# now check if the image is a duplicate
if (my $data = $sth->fetchrow_hashref) {
# it is a duplicate
$file = $data->{'image'};
$sth->finish;
$dup = 1;
} else {
# it is not a duplicate
if (! -e $documentroot.$dpath) {
make_path($documentroot.$dpath,{mode=>0775})
or die("Could not create path '$documentroot.$dpath' : $!\n");
print "Created directory '$documentroot.$dpath'\n" if ($VERBOSE);
} elsif (! -d $documentroot.$dpath) {
die("'$documentroot.$dpath' exists but is not a directory.\n");
} elsif (! -w $documentroot.$dpath) {
die("Directory '$documentroot.$dpath' is not writable.\n");
}
my $newfile = $dpath.'/'.$dfile.'.'.$dext;
my $absfile = $documentroot.$dpath.'/'.$dfile.'.'.$dext;
my $count = 1;
if (-e $absfile) {
while (-e $absfile) {
$absfile = "$documentroot$dpath/$dfile.$count.$dext";
$newfile = "$dpath/$dfile.$count.$dext";
$count++;
}
}
my $epoch = time();
$query = qq(INSERT INTO images (sha256, epoch, image)
VALUES (?,?,?));
$sth=$dbh->prepare($query)
or die("prepare statement failed: $dbh->errstr()\n");
$sth->execute($fingerprint, $epoch, $newfile)
or die("execute statement failed: $dbh->errstr()\n");
if ($VERBOSE > 1) {
print qq(Query = $query\n);
print qq(FEN= $fingerprint, $epoch, $newfile\n);
}
copy($tmpfile, $documentroot.$newfile)
or die("Could not relocate from '$tmpfile' to '$documentroot$newfile'\n");
# double check group write for the shared file
my $mode = 0664;
chmod($mode, $newfile);
$sth->finish;
$dbh->commit;
$file = $newfile;
}
$dbh->disconnect;
return($file, $dup);
}
sub finiteloop {
my ( $file ) = @_;
my $image = Image::Magick->new;
open(my $inputimage, '<', $file);
my $err = $image->Read(file=>\$inputimage);
close($inputimage);
my ($loop) = $image->Get('iterations') || 0;
if ($loop == 0) {
$image->Set('iterations' => 5);
$image->Write($file);
}
return($image);
}
sub make_image_thumbnail {
my ($dbfile,$documentroot, $original_image) = @_;
my ($destfile, $destpath, $destext) =
fileparse($original_image, qr/\.[^.*]*$/);
$destext =~ s/^\.//;
my $thumbnail = $destpath.$destfile.'.thumbnail.'.$destext;
my $image = Image::Magick->new;
open(my $inputimage, '<', $documentroot.$original_image);
my $err = $image->Read(file=>\$inputimage);
# || &clean_up($dbfile,$documentroot.$original_image);
close($inputimage);
if ($err) {
print "Error: $err\n";
exit(1);
}
my ($width,$height) = $image->Get('width','height');
my ($twidth, $theight);
if ($width > 250 || $height > 250) {
if ($width > $height) {
if ($width > 250) {
$theight = int($height * (250/$width));
$twidth = 250;
}
} else {
if ($height > 250) {
$twidth = int($width * (250/$height));
$theight = 250;
}
}
if ($destext ne 'svg') {
$image->Resize(width=>$twidth, height=>$theight);
$image->Write($documentroot.$thumbnail);
} else {
if (link($documentroot.$original_image,
$documentroot.$thumbnail)) {
if ($VERBOSE) {
print "Created hard link for thumbnail\n";
}
} else {
die("Could not hard link for thumbnail: \
'$documentroot.$original_image' -> '$documentroot.$thumbnail'\n");
}
}
# double-check the group write permissions for this shared file
my $mode = 0664;
chmod($mode, $documentroot.$thumbnail);
} else {
($twidth, $theight) = ($width, $height);
$thumbnail = 0;
}
return($thumbnail, $twidth, $theight);
}
sub make_video_thumbnail {
my ($dbfile,$documentroot, $original_image) = @_;
my ($destfile, $destpath, $destext) =
fileparse($original_image, qr/\.[^.*]*$/);
$destext =~ s/^\.//;
my $command = '/usr/bin/ffmpeg';
my @options = qw(-loglevel error
-filter_complex scale=250:-1
-frames:v 1
-q:v 2);
my $thumbnail = $destpath.$destfile.'.thumbnail.png';
my $ec = system($command, '-i', $documentroot.$original_image,
@options, $documentroot.$thumbnail);
if ($ec) {
print "Error $ec using ffmpeg for thumbnail\n";
}
my $image = Image::Magick->new;
open(my $inputimage, '<', $documentroot.'/'.$thumbnail);
my $err = $image->Read(file=>\$inputimage);
close($inputimage);
if ($err) {
print "Error: $err\n";
exit(1);
}
my ($twidth,$theight) = $image->Get('width','height');
# double-check the group write permissions for this shared file
my $mode = 0664;
chmod($mode, $documentroot.$thumbnail);
return($thumbnail, $twidth, $theight);
}
sub clean_up {
my ($dbfile,$absfilepath) = @_;
if (-f $absfilepath) {
my $sha = Digest::SHA->new('sha256_hex');
$sha->addfile($absfilepath);
my $fingerprint = $sha->hexdigest;
if (!$fingerprint) {
die("Could not fingerprint the original file: $absfilepath\n");
}
my $dbh = DBI->connect("dbi:SQLite:dbname=$dbfile", undef, undef,
{ AutoCommit => 0, RaiseError => 1 })
or die("Could not open database '$dbfile': $!\n");
my $query = qq(DELETE FROM images WHERE sha256=?);
my $sth = $dbh->prepare($query)
or die("prepare statement failed: $dbh->errstr()\n");
$sth->execute($fingerprint)
or die("execute statement failed: $dbh->errstr()\n");
$sth->finish;
$dbh->commit;
$dbh->disconnect;
unlink($absfilepath);
}
die("Could not process image. File and db entry removed.\n");
}
sub prepare_database {
my ($dbfile) = @_;
my ($dbpath, $dbext) = (0) x 2;
($dbfile, $dbpath, $dbext) =
fileparse($dbfile, qr/\.[^.*]*$/);
$dbext =~ s/^\.//;
if (! -e $dbpath) {
make_path($dbpath,{mode=>0775})
or die("Could not create path '$dbpath' : $!\n");
print "Created directory '$dbpath'\n" if ($VERBOSE);
} elsif (! -d $dbpath) {
die("'$dbpath' exists but is not a directory.\n");
} elsif (! -w $dbpath) {
die("Directory '$dbpath' is not writable.\n");
}
my $db = qq($dbpath/$dbfile.$dbext);
my $schema = qq(CREATE TABLE IF NOT EXISTS
images (sha256 varchar(64) unique not null,
epoch integer not null,
image varchar(256) not null));
my @cmd = ('echo', "'$schema'", '|', 'sqlite3', $db);
print join(' ', @cmd),"\n";
system(join(' ', @cmd)) == 0
or die("Could not create database '$db': $?\n");
$schema = qq(CREATE UNIQUE INDEX fingerprint on images (sha256));
@cmd = ('echo', "'$schema'", '|', 'sqlite3', $db);
system(join(' ', @cmd)) == 0
or die("Could not create index: $?\n");
print "database created\n";
return(1);
}
sub isimage {
my ($file) = @_;
if ($VERBOSE > 1) {
print qq(Running Image::Magick\n);
}
my $mystery = Image::Magick->new();
$mystery->Read($file);
if ( $mystery->Get('format')) {
return(1);
}
return(0);
}
sub isvideo {
my ($file) = @_;
my $command = q(/usr/bin/ffprobe);
my @options = qw(-v error -select_streams v:0 -show_entries
stream=codec_name -of default=nokey=1:noprint_wrappers=1);
if ($VERBOSE > 1) {
print qq(Running $command\n);
}
my ($format, $stderr, $process);
($format) = capture_stdout {
system($command, @options, $file);
};
chomp($format);
if ($format eq 'mpeg'
or $format eq 'vp9'
or $format eq 'mpeg4'
or $format eq 'cinepak'
or $format eq 'mjpeg'
or $format eq 'theora'
or $format eq 'vp8' ) {
return(1);
}
return(0);
}
Generator/search.fcgi
#!/usr/bin/perl -T
use CGI::Fast;
# use CGI::Carp qw(fatalsToBrowser warningsToBrowser);
use DBD::SQLite::Constants qw( SQLITE_OPEN_READONLY );
use DBI qw(:sql_types);
use Text::ParseWords qw(parse_line);
use HTML::Entities;
use Data::Dumper qw(Dumper);
use strict;
use warnings;
while (my $q = CGI::Fast->new) {
print("Content-Type: text/html; charset=utf-8\n\n");
print qq(<!DOCTYPE html>\n);
print qq(<html xmlns="http://www.w3.org/1999/xhtml">\n);
my $head = &head_default;
my $body;
if ( defined($q->param('clear') ) ||
! $q->param && $q->request_method() eq 'GET') {
$body = &body_default;
} elsif ( $q->param && $q->request_method() eq 'GET') {
$body = &body_search($q);
} else {
print qq(\n);
exit(1);
}
print qq(<head>\n$head\n</head>\n);
print qq(<body>\n$body\n</body>\n);
print qq(</html>\n);
}
exit(0);
sub get_facets {
my ($q) = (@_);
if (!defined($q)){
return(1);
}
if (!defined($q->param('facets'))){
}
my $facets = $q->param('facets') || return(1);
if ($facets =~ m|[^0-9]|) {
return(1);
}
( $facets ) = ( $facets =~ m|^([0-9]+)$| );
return($facets);
}
sub head_default {
my $head = <<"EOH";
<meta name="dc.description"
content="Full text and faceted searching for new and old articles at Techrights" />
<title>Techrights — Search</title>
<meta name="dc.title" content="Search Techrights" />
<link rel="stylesheet" href="/CSS/techrights.css"
media="screen" type="text/css" />
<link rel="stylesheet" href="/CSS/techrights.search.css"
media="screen" type="text/css" />
<link rel="alternate" type="application/rss+xml"
href="/feed.xml" title="Techrights" />
EOH
return($head);
}
sub print_env {
print qq(<pre>\n);
foreach my $var (sort(keys(%ENV))) {
my $val = $ENV{$var};
$val =~ s|\n|\\n|g;
$val =~ s|"|\\"|g;
print "${var}=\"${val}\"\n";
}
print qq(</pre>\n);
return(1);
}
sub body_default {
my $facets = &get_facets;
my $body = qq(
<div class="header">
<a href="/"><img src="/images/header-pillars.jpg" width="760" height="132"
title="Techrights" alt="Bonum Certa Men Certa"/></a>
</div>
<div class="search">\n);
$body .= &navigation;
$body .= qq(
<h1>Techrights Article Search</h1>);
$body .= &basic_search_form($facets, ['','','','']);
$body .= &navigation;
$body .= qq(<div class="footer">\n<ul>\n</ul>\n</div>\n);
$body .= qq(\n</div>\n);
return($body);
}
sub basic_search_form {
my ($facets, @queries) = (@_);
if ($facets < 1) {
$facets = 1;
}
my $j = 1;
my $form = qq(<form method="get">\n);
$form .= qq( <input type="submit" name="search" value="Search" )
. qq(id="submit" /> \n);
my $count = $#queries + 1;
while ($#queries >=0) {
my ($query, $set, $op, $mod) = @{ shift(@queries) };
$query = encode_entities($query, '<>&"');
my ($sa, $sm, $sb, $sc, $sd, $se) = ('') x 6; # which data set
my ($oa, $oo, $on, $ox) = ('') x 4; # which operators
if ($set eq 'any') {
$sa = 'selected';
} elsif ($set eq 'metadata') {
$sm = 'selected';
} elsif ($set eq 'body') {
$sb = 'selected';
} elsif ($set eq 'comments') {
$sc = 'selected';
} elsif ($set eq 'startdate') {
$sd = 'selected';
} elsif ($set eq 'enddate') {
$se = 'selected';
}
if ($op eq 'and') {
$oa = 'selected';
} elsif ($op eq 'or') {
$oo = 'selected';
} elsif ($op eq 'not') {
$on = 'selected';
} elsif ($op eq 'xor') {
$ox = 'selected';
}
$form .= qq(<br />\n);
$form .= qq( <label>\n);
my $size = length($query) < 20 ? 20 : length($query)+1;
$form .= qq(<input name="query$j" autocomplete="query$j"
size="$size"
id="query$j" value="$query" />\n);
$form .= qq(<select name="set$j" id="set$j">\n);
$form .= qq( <option value="any" $sa>Any</option>\n);
$form .= qq( <option value="metadata" $sm>Metadata</option>\n);
$form .= qq( <option value="body" $sb>Body</option>\n);
$form .= qq( <option value="comments" $sc>Comments (old only))
. qq(</option>\n);
$form .= qq( <option value="startdate" )
. qq($sd title="yyyy-mm-dd">Starting Date (yyyy-mm-dd)</option>\n);
$form .= qq( <option value="enddate" )
. qq($se title="yyyy-mm-dd">Ending Date (yyyy-mm-dd)</option>\n);
$form .= qq(</select> \n);
if ($#queries >= 0) {
$form .= qq(<select name="op$j" id="op$j">\n);
$form .= qq( <option value="and" $oa>AND</option>\n);
$form .= qq( <option value="or" $oo>OR</option>\n);
$form .= qq( <option value="not" $on>NOT</option>\n);
$form .= qq( <option value="xor" $ox>XOR</option>\n);
$form .= qq(</select> \n);
}
if ($count > 1) {
$form .= qq(<input type="submit" name="mod$j");
$form .= qq( value="-" id="mod$j" /> \n);
$form .= qq(<input type="submit" name="mod$j");
$form .= qq( value="+" id="mod$j" /> \n);
} else {
$form .= qq(<input type="submit" name="mod$j");
$form .= qq( value="+" id="mod$j" /> \n);
}
$form .= qq( </label>\n\n);
$j++;
}
$form .= <<"EOF";
<br />
<input type="submit" name="search" value="Search" id="submit" /> <br />
<input type="submit" name="clear" value="Clear" id="clear" /> <br />
</form>
EOF
return ($form);
}
sub navigation {
my $nav = qq(
<div class="navigation">
<ul>
<li><a href="/index.shtml">Home</a></li>
<li><a href="/about.shtml">About</a></li>
<li><a href="/irc.shtml">IRC</a></li>
<li><a href="gemini://gemini.techrights.org/">Gemini Edition</a></li>
<li><a href="/feed.xml">Feed</a></li>
</ul>
</div>\n);
return($nav);
}
sub body_search {
my ($q) = (@_);
my $facets = &get_facets;
my @queries = ();
my $i = 1;
while ( defined( $q->param("query$i") ) ) {
# lll validation needs confirmation
my $query = $q->param("query$i") || '';
if ( $query =~ m/[\x00-\x1f]/ ) {
return(&body_default);
}
$query =~ s/^\s+//;
if (!$query) {
return(&body_default);
}
my $set = $q->param("set$i") || '';
my $mod = $q->param("mod$i") || '';
my $op = $q->param("op$i") || '';
$i++;
if ($mod eq '-') {
next;
}
push(@queries, [$query, $set, $op, $mod]);
if ($mod eq '+') {
push(@queries, ['','','']);
}
}
my $body = qq(
<div class="header">
<a href="/"><img src="/images/header-pillars.jpg" width="760"
height="132" title="Techrights" alt="Bonum Certa Men Certa"/></a>
</div>\n);
$body .= &navigation;
$body .= &basic_search_form($facets, @queries);
if (defined($q->param('search'))) {
my $results = &basic_search(@queries);
$body .= $results;
}
$body .= &navigation;
$body .= qq(<div class="footer">\n<ul>\n</ul>\n</div>\n);
return($body);
}
sub basic_search {
my (@queries) = (@_);
my $database = '/var/www/techrights.org/db/tr-static-site-generator.sqlite3';
my $dbh = DBI->connect("dbi:SQLite:dbname=$database", undef, undef,
{ AutoCommit => 0, RaiseError => 1,
on_connect_do => "PRAGMA foreign_keys = ON",
sqlite_open_flags => SQLITE_OPEN_READONLY,
})
or die("Could not open database '$database': $!\n");
my @selectold = ();
my @selectnew = ();
my @prewithqueryold = ();
my @prewithquerynew = ();
my @withqueryold = ();
my @withquerynew = ();
my @opsold = ();
my @opsnew = ();
my $subtable = 0;
my $skipnew = 0;
foreach my $facet (@queries) {
my ($phrase, $set, $op, $mod) = @{$facet};
$phrase = validate_phrase($phrase);
if (! $phrase) {
next;
}
if ($set eq 'startdate' || $set eq 'enddate') {
if ( $phrase =~ m/^\d{4}-\d{2}-\d{2}$/ ) {
$phrase = qq($phrase);
} elsif ( $phrase =~ m/^\d{4}-\d{2}$/ ) {
$phrase = qq($phrase);
} elsif ( $phrase =~ m/^\d{4}$/ ) {
$phrase = qq($phrase);
} elsif ( $phrase =~ m/-/ ) {
$phrase = qq("$phrase");
}
} else {
if ( $phrase =~ m/^\d{4}-\d{2}-\d{2}$/ ) {
$phrase = qq("$phrase");
} elsif ( $phrase =~ m/^\d{4}-\d{2}$/ ) {
$phrase = qq("$phrase");
} elsif ( $phrase =~ m/-/ ) {
$phrase = qq("$phrase");
}
}
if ($set eq 'any') {
$subtable++;
push(@withqueryold, qq(
subtableold$subtable AS (
SELECT old_fts5_body.rowid AS recno
FROM old_fts5_body
WHERE old_fts5_body MATCH ?
UNION
SELECT old_fts5_metadata.rowid AS recno
FROM old_fts5_metadata
WHERE old_fts5_metadata MATCH ?
UNION
SELECT old_fts5_comments.rowid AS recno
FROM old_fts5_comments
WHERE old_fts5_comments MATCH ?
)) );
push(@withquerynew, qq(
subtablenew$subtable AS (
SELECT fts5_body.rowid AS recno
FROM fts5_body
WHERE fts5_body MATCH ?
UNION
SELECT fts5_metadata.rowid AS recno
FROM fts5_metadata
WHERE fts5_metadata MATCH ?
)) );
push(@selectold, qq(
SELECT subtableold$subtable.recno AS recno
FROM subtableold$subtable));
push(@selectnew, qq(
SELECT subtablenew$subtable.recno AS recno
FROM subtablenew$subtable));
push(@prewithqueryold , ($phrase) x 3);
push(@prewithquerynew , ($phrase) x 2);
} elsif ($set eq 'metadata') {
$subtable++;
push(@withqueryold, qq(
subtableold$subtable AS (
SELECT old_fts5_metadata.rowid AS recno
FROM old_fts5_metadata
WHERE old_fts5_metadata MATCH ?
)) );
push(@withquerynew, qq(
subtablenew$subtable AS (
SELECT fts5_metadata.rowid AS recno
FROM fts5_metadata
WHERE fts5_metadata MATCH ?
)) );
push(@selectold, qq(
SELECT subtableold$subtable.recno AS recno
FROM subtableold$subtable));
push(@selectnew, qq(
SELECT subtablenew$subtable.recno AS recno
FROM subtablenew$subtable));
push(@prewithqueryold, $phrase);
push(@prewithquerynew, $phrase);
} elsif ($set eq 'body') {
$subtable++;
push(@withqueryold, qq(
subtableold$subtable AS (
SELECT old_fts5_body.rowid AS recno
FROM old_fts5_body
WHERE old_fts5_body MATCH ?
)) );
push(@withquerynew, qq(
subtablenew$subtable AS (
SELECT fts5_body.rowid AS recno
FROM fts5_body
WHERE fts5_body MATCH ?
)) );
push(@selectold, qq(
SELECT subtableold$subtable.recno AS recno
FROM subtableold$subtable));
push(@selectnew, qq(
SELECT subtablenew$subtable.recno AS recno
FROM subtablenew$subtable));
push(@prewithqueryold, $phrase);
push(@prewithquerynew, $phrase);
} elsif ($set eq 'comments') {
$subtable++;
push(@withqueryold, qq(
subtableold$subtable AS (
SELECT old_fts5_comments.rowid AS recno
FROM old_fts5_comments
WHERE old_fts5_comments MATCH ?
)) );
# filler to make an empty set
push(@withquerynew, qq(
subtablenew$subtable AS (
SELECT keys.recno AS recno
FROM keys
WHERE false AND recno = ?
)) );
push(@selectold, qq(
SELECT subtableold$subtable.recno AS recno
FROM subtableold$subtable));
push(@selectnew, qq(
SELECT subtablenew$subtable.recno AS recno
FROM subtablenew$subtable));
push(@prewithqueryold, $phrase);
push(@prewithquerynew, $phrase);
} elsif ($set eq 'startdate') {
$subtable++;
push(@withqueryold, qq(
subtableold$subtable AS (
SELECT old_metadata.recno AS recno
FROM old_metadata
WHERE term='dc.date.created' AND value >= ?
)) );
push(@withquerynew, qq(
subtablenew$subtable AS (
SELECT metadata.recno AS recno
FROM metadata
WHERE term='dc.date.created' AND value >= ?
)) );
push(@selectold, qq(
SELECT subtableold$subtable.recno AS recno
FROM subtableold$subtable));
push(@selectnew, qq(
SELECT subtablenew$subtable.recno AS recno
FROM subtablenew$subtable));
push(@prewithqueryold, $phrase);
push(@prewithquerynew, $phrase);
} elsif ($set eq 'enddate') {
# kludge to allow includive end dates for partial dates
if (length($phrase) == 7) {
# some dates will be invalid, but that is ok because
# this is a string comparison on a string field
$phrase .= '-31';
} elsif (length($phrase) == 4) {
$phrase .= '-12-31';
}
# check through to the end of the day
$phrase .= 'T23:59';
# build sql query
$subtable++;
push(@withqueryold, qq(
subtableold$subtable AS (
SELECT old_metadata.recno AS recno
FROM old_metadata
WHERE term='dc.date.created' AND value <= ?
)) );
push(@withquerynew, qq(
subtablenew$subtable AS (
SELECT metadata.recno AS recno
FROM metadata
WHERE term='dc.date.created' AND value <= ?
)) );
push(@selectold, qq(
SELECT subtableold$subtable.recno AS recno
FROM subtableold$subtable));
push(@selectnew, qq(
SELECT subtablenew$subtable.recno AS recno
FROM subtablenew$subtable));
push(@prewithqueryold, $phrase);
push(@prewithquerynew, $phrase);
} else {
return(0);
}
if ($op eq 'and') {
push(@opsold, 'INTERSECT');
} elsif ($op eq 'or') {
push(@opsold, 'UNION');
} elsif ($op eq 'not') {
push(@opsold, 'EXCEPT');
}
if ($op eq 'and') {
push(@opsnew, 'INTERSECT');
} elsif ($op eq 'or') {
push(@opsnew, 'UNION');
} elsif ($op eq 'not') {
push(@opsnew, 'EXCEPT');
}
}
my $qold = '';
my $wqold = '';
foreach my $s (@selectold) {
my $op = shift(@opsold) || '';
$qold .= $s . "\n " . $op;
}
if (@withqueryold) {
$wqold .= "\tWITH \n " . join(",\n ", @withqueryold) . " \n\n";
}
my $queryold = qq(
SELECT old_keys.recno AS recno,
T1.value AS title,
T2.value AS date,
file
FROM old_keys
JOIN old_metadata AS T1
ON old_keys.recno = T1.recno
JOIN old_metadata AS T2
ON old_keys.recno = T2.recno
WHERE
T1.term='dc.title'
AND T2.term='dc.date.created'
AND T1.recno
IN \(
$wqold
$qold
\)
);
my $querynew = '';
if ($#selectnew >= 0) {
my $qnew = '';
my $wqnew = '';
foreach my $s (@selectnew) {
my $op = shift(@opsnew) || '';
$qnew .= $s . "\n " . $op;
}
if (@withquerynew) {
$wqnew .= "\tWITH \n " . join(",\n ", @withquerynew) . "\n\n";
}
$querynew = qq(
SELECT keys.recno AS recno,
T1.value AS title,
T2.value AS date,
CASE ballast
WHEN 0
THEN '/n/'||date||'/'||slug
ELSE '/n/'||date||'/'||slug||'.'||ballast
END file
FROM keys
JOIN metadata AS T1
ON keys.recno = T1.recno
JOIN metadata AS T2
ON keys.recno = T2.recno
WHERE
T1.term='dc.title'
AND T2.term='dc.date.created'
AND T1.recno
IN \(
$wqnew
$qnew
\)
ORDER BY date
);
}
my $query = $queryold . "\tUNION " . $querynew;
my @prewithquery = ();
push(@prewithquery, @prewithqueryold, @prewithquerynew);
my $sth = $dbh->prepare($query);
# trap errors in an eval
eval {
$sth->execute(@prewithquery);
};
# if there was an error, complain and quit, not good for production
if ($@) {
my $err = $dbh->errstr();
my $offset = $dbh->sqlite_error_offset();
$sth->finish;
$dbh->rollback;
$dbh->disconnect;
die("execute statement failed: $offset, $err\n");
}
my $results = '';
while (my $row = $sth->fetchrow_hashref) {
my $recno = $row->{'recno'} || next;
my $date = $row->{'date'} || next;
my $title = $row->{'title'} || next;
my $file = $row->{'file'} || next;
$date =~ s/[ T].*$//;
if ($file =~ m|^/n/|) {
$file =~ s|^/n/(\d{4})(\d{2})(\d{2})/|/n/$1/$2/$3/|;
$file =~ s|\.0$||;
$file .= '.shtml';
}
$results .= qq(<tr>\n);
$results .= qq( <td id="$recno">$date</td>\n);
$results .= qq( <td><a href="$file">$title</a></td>\n);
$results .= qq(</tr>\n);
}
# avoid returning an empty table
if ($results) {
$results = qq(<table class="results">\n) . $results;
$results .= qq(</table>\n);
}
my $rc = $dbh->disconnect;
return($results);
}
sub validate_phrase {
my ($input) = (@_);
my $phrase = '';
my @output = ();
my $keep = 0;
my $flag = 0;
foreach my $word (parse_line('\s+', $keep, $input)) {
if (!$word) {
next;
}
$word =~ s/^[[:punct:]]+//;
$word =~ s/"/ /g;
$word =~ s/^\s+//;
$word =~ s/\s+$//;
if ($word =~ m/\%/i or
$word =~ m/\s+/ or
$word =~ m/\x{3a}/ or
$word =~ m/\,/ or
$word =~ m/\./) {
$flag++;
}
if ($word =~ s/"/""/g) {
$flag++;
}
push(@output, $word);
}
$phrase = join(' ', @output);
if ($flag) {
$phrase = qq("$phrase");
}
return($phrase);
}
Generator/tr-old-extract-mysql-to-html-cref-comments.pl
#!/usr/bin/perl
use utf8;
use DBI;
use File::Path qw(make_path);
use URI;
use HTML::TreeBuilder::XPath;
use HTML::Entities qw(decode_entities);
use URI::Escape qw(uri_unescape);
use Config::Tiny;
use Getopt::Long;
use Data::Dumper qw(Dumper);
use open qw(:std :encoding(UTF-8));
use strict;
use warnings;
my ( $script ) = ( $0 =~ m|(?!.*/)(.*)\.[^\.]*$|);
our %opt = (
'config' => '',
'verbose' => 0,
'help' => 0,
);
GetOptions (\%opt, 'config=s', 'verbose+', 'help' );
my $config = $opt{config};
our $VERBOSE = $opt{verbose};
if ($opt{help}) {
&usage($script);
exit(0);
}
if (! -f $config) {
&usage($script);
exit(1);
} elsif (! -r $config) {
die("Configuration file '$config' is not readable\n");
}
my $configuration = Config::Tiny->read($config)
or die("Could not read configurationn file '$config': $!\n");
our $domain = $configuration->{webserver}->{domain} || '';
my $documentroot = $configuration->{webserver}->{documentroot}
or die(" missing from configuration file\n");
my $subdirectory = $configuration->{webserver}->{subdirectory}
or die(" missing from configuration file\n");
my $database = $configuration->{database}->{database}
or die(" missing from configuration file\n");
my $username = $configuration->{database}->{username}
or die(" missing from configuration file\n");
my $password = $configuration->{database}->{password}
or die(" missing from configuration file\n");
if ($VERBOSE) {
print "DR: $documentroot\n";
print "SD: $subdirectory\n";
print "DB: $database\n";
print "U: $username\n";
if ($VERBOSE > 2) {
print "P: $password\n";
}
}
my $dsn = "DBI:mysql:$database";
# connect to MySQL database
my %attr = ( PrintError=>0, # turn off error reporting via warn()
RaiseError=>1); # turn on error reporting via die()
our $dbh = DBI->connect($dsn,$username,$password, \%attr)
or die("Could not connect to $dsn using $username and the given password:$!\n");
# ####
# find base comments
my $query = qq(SELECT comment_ID FROM wp_comments WHERE comment_parent = 0);
my $sth = $dbh->prepare($query);
$sth->execute;
my %posts = ();
my %comments = ();
my %hierarchy = ();
# build hashes of comments and comment hierarchies
while(my $row = $sth->fetchrow_hashref) {
&sql_for_comments($row, \%posts, \%comments, \%hierarchy);
}
# ####
# build hashes of previous/next navigation links
$query = qq(SELECT ID, post_date, post_name,post_title FROM wp_posts
WHERE post_type="post"
AND post_status="publish"
ORDER BY post_date, ID
);
$sth = $dbh->prepare($query);
$sth->execute();
our %prev = ();
our %next= ();
my $old = 0;
my $previousl = 0;
my $previoust = 0;
my $l = '';
my $t = '';
my $oldl = '';
my $oldt = '';
while(my $row = $sth->fetchrow_hashref) {
my $id = $row->{ID};
my $d = $row->{post_date};
my $n = $row->{post_name};
$t = $row->{post_title};
$d =~ s/ .*$//g;
$d =~ s|-|/|g;
$l = "$subdirectory/".$d.'/'.$n.'/';
print qq($id\t$t\n) if ($VERBOSE > 2);
if ($old) {
$next{$old}->{url} = $l;
$next{$old}->{title} = $t;
}
if ($previousl) {
$prev{$old}->{url} = $previousl;
$prev{$old}->{title} = $previoust;
}
$old = $id;
$previoust = $oldt;
$oldt = $t;
$previousl = $oldl;
$oldl = $l;
# print Dumper($row),"\n";
}
$next{$old}->{url} = $l;
$next{$old}->{title} = $t;
$prev{$old}->{url} = $previousl;
$prev{$old}->{title} = $previoust;
undef($old);
undef($l);
undef($t);
undef($previousl);
undef($oldl);
undef($previoust);
undef($oldt);
# ####
# convert posts to HTML
$query = qq(SELECT *, wp_posts.ID as rn FROM wp_posts
LEFT JOIN wp_users ON wp_posts.post_author = wp_users.ID
WHERE post_type="post"
AND post_status="publish" ORDER BY post_date, wp_posts.ID
);
$sth = $dbh->prepare($query);
$sth->execute();
while(my $row = $sth->fetchrow_hashref) {
# print Dumper($row),"\n";
&sql_to_html('post', $row);
}
$sth->finish();
# convert posts to HTML
$query = qq(SELECT *, wp_posts.ID as rn FROM wp_posts LEFT JOIN wp_users ON wp_posts.post_author = wp_users.ID
WHERE post_type="page"
AND post_status="publish" ORDER BY post_date, wp_posts.ID
);
$sth = $dbh->prepare($query);
$sth->execute();
while(my $row = $sth->fetchrow_hashref) {
# print Dumper($row),"\n";
&sql_to_html('page', $row);
}
$sth->finish();
$dbh->disconnect();
exit(0);
sub usage {
my ($script) = @_;
print <<EOU;
Usage: $script [hv] --c file
-c, --config full path to configuration file in usual [section] + key=value style
-v, --verbose increase debugging outout
-h, --help this output
Configuration file:
[database]
database = tr
username = archive
password = rfvtgbyhn
[webserver]
domain = techrights.org
documentroot = /var/www/techrights.org/htdocs
subdirectory = /o
The subdirectory is relative to the document root.
EOU
return(1);
}
sub sql_to_html {
my ($type, $r) = @_;
# ID post_author post_date post_date_gmt post_content post_title
# post_category post_excerpt post_status comment_status ping_status
# post_password post_name to_ping pinged post_modified post_modified_gmt
# post_content_filtered post_parent guid menu_order post_type
# post_mime_type comment_count
my @fields = qw(
display_name
post_author post_date post_date_gmt post_content post_title
post_category post_excerpt post_status comment_status ping_status
post_password post_name to_ping pinged post_modified post_modified_gmt
post_content_filtered post_parent guid menu_order post_type
post_mime_type comment_count );
print qq($r->{rn}\n) if ($VERBOSE);
my ($path, $html);
if ($type eq 'post') {
($path, $html) = &create_html($type, $r);
} elsif ( $type eq 'page' ) {
($path, $html) = &create_html($type, $r);
} else {
return(0);
}
my $fullpath = $documentroot . "$subdirectory" . $path;
print "FULLPATH= $fullpath\n" if ($VERBOSE);
if ( ! -e $fullpath ) {
make_path($fullpath,{mode=>0775})
or die("Could not create path '$fullpath' : $!\n");
print "Created directory '$fullpath'\n" if ($VERBOSE);
} elsif ( ! -d $fullpath ) {
die("Not a directory: '$fullpath'\n");
} elsif ( ! -w $fullpath ) {
die("Not writable: '$fullpath'\n");
}
my $file = $fullpath.'index.shtml';
open(my $post, '>', $file)
or die("Could not open '$file': $!\n");
print $post $html;
close($post);
return(1);
}
sub create_html {
my ($type, $r) = @_;
# /2022/05/20/kapow-1-6-0-released/
my $rn = $r->{rn};
my $post_name = $r->{post_name};
print "RN= $rn\n $post_name\n" if ($VERBOSE);
$post_name = uri_unescape($post_name);
my $path = '';
if ($type eq 'post') {
$path = $r->{post_date};
$path =~ s/ .*//;
$path =~ s|-|/|g;
$path = '/'.$path . '/' . $post_name . '/';
} elsif ($type eq 'page') {
$path = '/' . $post_name . '/';
if ($VERBOSE) {
print qq(Redirect permanent $path $path);
}
}
my $post_title = $r->{post_title};
my $post_date_gmt = $r->{post_date_gmt};
my $post_modified_gmt = $r->{post_modified_gmt};
my $pm1 = qq(\n <meta name="dc.date.modified" content="$post_modified_gmt" />\n);
my $pm2 = '';
if ($post_modified_gmt) {
$pm2 = qq(<li>Modified: $post_modified_gmt UTC</li>\n);
}
my $display_name = $r->{display_name};
my $post_excerpt = $r->{post_excerpt};
my $post_content = $r->{post_content};
$post_content =~ s|(\n\r?)\s*(\n\r?)|$1<br />$2<br />\n|gm;
if ($post_content =~ m/video/) {
$post_content = &video_masher($post_content);
}
if ($post_content =~ m/\[cref\s+\d+/m) {
$post_content = &cref_masher($post_content);
}
# make navigation previous, next navigation links for body and header
my $p = $prev{$rn}->{url} || 0;
my $n = $next{$rn}->{url} || 0;
my $pt = $prev{$rn}->{title} || 0;
my $nt = $next{$rn}->{title} || 0;
my $l = 0;
my $ll = 0;
if ($nt && $pt) {
$l = qq( <link rel="prev" href="$p" />\n <link rel="next" href="$n" />\n);
$ll = qq( ← <a href="$p">$pt</a>\n | \n <a href="$n">$nt</a> →\n);
} else {
if ($nt) {
$l = qq( <link rel="next" href="$n" />\n);
$ll = qq( <a href="$n">$nt</a> →\n);
} elsif ($pt) {
$l = qq( <link rel="prev" href="$p" />\n);
$ll = qq( ← <a href="$p">$pt</a>\n);
} else {
warn("ID: $rn\n");
}
}
my $c = &get_comments($rn, \%posts, \%comments, \%hierarchy);
my $cmnt = '';
if ($c) {
$cmnt = qq(<div class="comments">\n<h1 class="comment">Comments</h1>)
. decode_entities($c->as_XML_indented)
. qq(\n</div>\n);
}
if ($type eq 'page') {
$cmnt = '';
$l = '';
$ll = '';
}
# make actual HTML document
my $html = <<EOHTML;
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta name="generator" content="Techrights Perl-based MySQL Extractor" />
<title>$post_title</title>
<!-- $rn -->
<meta name="dc.title" content="$post_title" />
<meta name="dc.date.created" content="$post_date_gmt" /> $pm1
<meta name="dc.creator" content="$display_name" />
<meta name="dc.description" content="$post_excerpt" />
<link rel="stylesheet" href="/CSS/techrights-old.css" media="screen" type="text/css" />
<link rel="alternate" type="application/rss+xml" href="/feed.xml" title="Techrights" />
$l
</head>
<body>
<!--#include virtual="/header.html"-->
<!--#include virtual="/feeds.html"-->
<!--#include virtual="/navigation.html"-->
<div class="oldpost">
<div class="navigation">
$ll
</div>
<h1>$post_title</h1>
<ul>
<li class="author">$display_name</li>
<ul class="date">
<li>$post_date_gmt UTC</li>
$pm2
</ul>
</ul>
<div>
$post_content
</div>
$cmnt
<div class="navigation">
$ll
</div>
</div>
<h1>Recent Techrights' Posts</h1>
<!--#include virtual="/latest-news.html"-->
<!--#include virtual="/footer.html"-->
</body>
</html>
EOHTML
$html =~ s/\s+<\s+/\< /gm;
$html = &miserable_unicode_hack($html);
return($path, $html);
}
sub video_masher {
my ($post_content) = @_;
# convert absolute links to relative in some of the embedded HTML
# fsize and other SSI
while ( $post_content =~
s{(?<=\<\!--)([^>]*)https?://*$domain/([^>]*)(?=--\>)}
{$1/$2}gx ) {
1;
}
# anchors
while ( $post_content =~
s{(?<=\<a)([^>]*href\s*=\s*"[^>]*)https?://*$domain/([^>]*)(?=>)}
{$1/$2}gmux ) {
1;
}
# videos
while ( $post_content =~
s{(?<=\<video)([^>]*src\s*=\s*"[^>]*)
https?://*$domain/([^>]*)(?=>)}
{$1/$2}gmux ) {
1;
}
# convert video markdown to HTML, when possible
while ( my ($v) = ( $post_content =~ m|\[video\s+([^\]]+)\]\s*\[/video\]| ) ) {
if (! $v) {
return($post_content);
}
my ($poster) = ( $v =~ m/poster\s*=\s*"([^"]+)"/ );
my ($width) = ( $v =~ m/width\s*=\s*"?([0-9]+)"?/ ); # some lack quotes
my ($height) = ( $v =~ m/height\s*=\s*"?([0-9]+)"?/ ); # some lack quotes
my ($type, $vurl) = ( $v =~ m/(ogv|mp4|webm)\s*=\s*"([^"]+)"/ );
if (! $type || ! $vurl || ! $width) {
return($post_content);
}
my $ourl = $vurl;
if ($domain) {
# convert to relative links, if possible
$vurl =~ s|^https?://*$domain/|/|;
}
my $div = HTML::Element->new('div');
$div->attr('class', 'video');
my $video = HTML::Element->new('video');
$video->attr('controls', 'controls');
$video->attr('preload', 'metadata');
if ($poster) {
if ($domain) {
# convert to relative links, if possible
$poster =~ s|^https?://*$domain/|/|;
}
$video->attr('poster', $poster);
}
if ($height) {
$video->attr('height', $height);
}
$video->attr('width', $width);
my $source = HTML::Element->new('source');
$source->attr('type', "video/$type");
$source->attr('src', $vurl);
my $anchor = HTML::Element->new('a');
$anchor->attr('href', $vurl);
$anchor->push_content($ourl);
$source->push_content($anchor);
$video->push_content($source);
$div->push_content($video);
$v = $div->as_XML_indented;
$post_content =~ s|\[video\s+[^\]]+\]\s*\[/video\]|$v|;
if ($VERBOSE) {
print "VIDEO=$v\n";
}
}
return($post_content);
}
sub cref_masher {
my ($post_content) = @_;
my $query = qq(SELECT guid,post_title FROM wp_posts WHERE ID=?);
my $sth = $dbh->prepare($query);
while ($post_content =~ m/\[cref +(\d+) +([^\]]+)\]/
or $post_content =~ m/\[cref +(\d+)\s*\]/) {
my $cref = $1;
my $anchor = $2 || '';
my $title = '';
$sth->execute($cref);
while(my $row = $sth->fetchrow_hashref) {
my $url = URI->new($row->{guid});
my $path = $url->path;
my $fragment = $url->fragment;
my $link = "$subdirectory/".$path;
if ( my $q = $url->query) {
if ($q =~ m/p=([0-9]+)$/) {
my $id = $1;
my $query2 = qq(SELECT post_date,post_name,post_title FROM wp_posts WHERE ID=?);
my $sth2 = $dbh->prepare($query2);
$sth2->execute($id);
if (my $row2 = $sth2->fetchrow_hashref) {
if (! $anchor) {
$anchor = $row2->{post_title};
}
my $u = URI->new($row2->{guid});
my $d = $row2->{post_date};
$d =~ s/ .*$//g;
$d =~ s|-|/|g;
$link = "$subdirectory/".$d.'/'.$row2->{post_name}.'/';
} else {
die;
}
} else {
$link = '?'.$q;
}
} else {
my $query2 = qq(SELECT post_date,post_name,post_title FROM wp_posts WHERE ID=?);
my $sth2 = $dbh->prepare($query2);
$sth2->execute($cref);
if (my $row2 = $sth2->fetchrow_hashref) {
if (! $title) {
$title = ' : ' . $row2->{post_title};
}
if (! $anchor) {
$anchor = $row2->{post_title};
}
my $u = URI->new($row2->{guid});
my $d = $row2->{post_date};
$d =~ s/ .*$//g;
$d =~ s|-|/|g;
$link = "$subdirectory/".$d.'/'.$row2->{post_name}.'/';
} else {
die;
}
}
if (my $fragment = $url->fragment) {
$link = '#'.$fragment;
}
$link = qq(<a href="$link" title="cref $cref$title">$anchor</a>);
$post_content =~ s/\[cref +$cref[^\]]*\]/$link/em;
}
}
$sth->finish();
return($post_content);
}
sub miserable_unicode_hack {
my ($post) = @_;
$post =~ s/á/á/gm;
$post =~ s/ÃÂ /à/gm;
$post =~ s/ã/ã/gm;
$post =~ s/ä/ä/gm;
$post =~ s/ÃÂ/ā/gm;
$post =~ s/é/é/gm;
$post =~ s/ê/ê/gm;
$post =~ s/ë/ë/gm;
$post =~ s/Ãâ/ē/gm;
$post =~ s/Ãâ/ė/gm;
$post =~ s/è/è/gm;
$post =~ s/î/î/gm;
$post =~ s/ÃÂ/í/gm;
$post =~ s/ë/ī/gm;
$post =~ s/ï/ï/gm;
$post =~ s/ü/ļ/gm;
$post =~ s/Ã
â /ņ/gm;
$post =~ s/ñ/ñ/gm;
$post =~ s/ó/ó/gm;
$post =~ s/ø/ø/gm;
$post =~ s/Ã
¡/š/gm;
$post =~ s/ü/ü/gm;
$post =~ s/Ã
«/ū/gm;
$post =~ s/ú/ű/gm;
$post =~ s/Ã
½/Ž/gm;
$post =~ s/ï¬Æ/ffi/gm;
$post =~ s/ï¬Â/fi/gm;
$post =~ s/ï¬â¬/ff/gm;
$post =~ s/Ãâ¡/ć/gm;
$post =~ s/ââ¬/€/gm; # euro
$post =~ s/ÃÂ/€/gm; # euro
$post =~ s/ââ¬ÃÂ/€/gm; # euro
$post =~ s/ââ¬Å/“/gm; # smart open quote
$post =~ s/ââ¬Â/”/gm; # smart close quote
$post =~ s/ââ¬Å/“/gm; # smart open quote
$post =~ s/ââ¬Â/”/gm; # smart close quote
$post =~ s/ââ¬â¢/’/gm; # smart close single quote
$post =~ s/ââ¬Ë/‘/gm; # smart open single quote
$post =~ s/ô/’/gm; # smart apostrophe
$post =~ s/ââ¬â/—/gm; # mdash
$post =~ s/ââ¬â/–/gm; # ndash
$post =~ s/ââ¬Â/–/gm; # hyphen
$post =~ s/ââ¬Â¢/●/gm; # list bullet
$post =~ s/ââË/⬆/gm; # fat up arrow
$post =~ s/ã/£/gm; # gbp
$post =~ s/é/™/gm; # trademark sign
$post =~ s/î/®/gm; # registered trademark
$post =~ s/ââ¬Â¦/…/gm; # ellipsis
$post =~ s/âËž/☞/gm; # manicule outline
$post =~ s/âËâº/☛/gm; # manicule solid
return($post);
}
sub get_comments {
my ($p, $posts, $comments, $hierarchy) = @_;
my $xhtml = HTML::TreeBuilder::XPath->new;
$xhtml->implicit_tags(1);
$xhtml->no_space_compacting(0);
my $ul = HTML::Element->new('ul');
my $count = 0;
foreach my $k (@{$posts{$p}}) {
print $k,"\n" if ($VERBOSE);
my $li = HTML::Element->new('li');
$li->attr('id', "comment$k");
my $p1 = HTML::Element->new('p');
$p1->attr('class','author');
$p1->push_content($comments{$k}->{comment_author} );
$li->push_content($p1);
my $p2 = HTML::Element->new('p');
$p2->attr('class','date');
$p2->push_content($comments{$k}->{comment_date_gmt});
$li->push_content($p2);
my $div = HTML::Element->new('div');
$div->attr('class','words');
$div->push_content($comments{$k}->{comment_content});
$li->push_content($div);
$ul->push_content($li);
$count++;
my $html = &render(0, $k, $comments, $hierarchy);
if ($html) {
$ul->push_content($html);
}
}
if ($count) {
return($ul);
} else {
return(0);
}
}
sub render {
my ($layer, $k, $comments, $hierarchy) = @_;
my $comment = $hierarchy{$k};
if (!defined($comment)){
return(0);
}
$layer++;
my $ul = HTML::Element->new('ul');
my $count = 0;
foreach my $c (@{$comment}) {
my $li = HTML::Element->new('li');
$li->attr('id', "comment$c");
my $p1 = HTML::Element->new('p');
$p1->attr('class','author');
$p1->push_content($comments{$c}->{comment_author} );
$li->push_content($p1);
my $p2 = HTML::Element->new('p');
$p2->attr('class','date');
$p2->push_content($comments{$c}->{comment_date_gmt});
$li->push_content($p2);
my $div = HTML::Element->new('div');
$div->attr('class','words');
$div->push_content($comments{$c}->{comment_content});
$li->push_content($div);
$ul->push_content($li);
print "."x$layer,$c,"\n" if ($VERBOSE);
my $html = &render($layer, $c, $comments, $hierarchy);
if ($html) {
$ul->push_content($html);
$count++;
}
}
return($ul);
}
sub sql_for_comments {
my ($r, $posts, $comments, $hierarchy) = @_;
my $id = $r->{comment_ID};
# comment_ID comment_post_ID comment_author comment_author_email comment_author_url
# comment_author_IP comment_date comment_date_gmt comment_content comment_karma
# comment_approved comment_agent comment_type comment_parent
# user_id comment_subscribe
return if (! $id);
my $query = qq(
with recursive cte (comment_ID, comment_post_ID, comment_author, comment_parent,
comment_date_gmt, comment_type, comment_content) as (
select comment_ID,
comment_post_ID,
comment_author,
comment_parent,
comment_date_gmt,
comment_type,
comment_content
from wp_comments
where comment_ID = ? AND comment_approved = 1
union all
select p.comment_ID,
p.comment_post_ID,
p.comment_author,
p.comment_parent,
p.comment_date_gmt,
p.comment_type,
p.comment_content
from wp_comments p
inner join cte
on p.comment_parent = cte.comment_ID
)
SELECT *
FROM cte ORDER BY comment_date_gmt;
);
my $sth = $dbh->prepare($query);
$sth->execute($id);
while(my $row = $sth->fetchrow_hashref) {
my $cid = $row->{comment_ID};
my $parent_id = $row->{comment_parent};
my $post_id = $row->{comment_post_ID};
if ($parent_id eq 0) {
push(@{$posts{$post_id}}, $cid);
}
$comments{$cid}->{comment_post_ID} = $row->{comment_post_ID};
$comments{$cid}->{comment_parent} = $row->{comment_parent};
$comments{$cid}->{comment_author} = $row->{comment_author};
$comments{$cid}->{comment_date_gmt} = $row->{comment_date_gmt};
my $content = $row->{comment_content};
$content =~ s|(\s*)\n(\s*)\n|$1<br />\n$2<br />\n|gm;
$comments{$cid}->{comment_content} = $content;
push (@{$hierarchy{$parent_id}}, $cid);
}
$sth->finish();
return(1);
}
Generator/tr-find-deduplicate-files.pl
#!/usr/bin/perl
use File::Find;
use strict;
use warnings;
my $path = shift;
if ( ! -d $path) {
print qq("$path" is not a directory\n);
exit(1);
}
our %inodes = ();
File::Find::find({wanted => \&wanted}, $path);
exit(0);
sub wanted {
my ($dev,$inode,$mode,$nlink,$uid,$gid,$rdev,$size,
$atime,$mtime,$ctime,$blksize,$blocks);
# print "D=$File::Find::name\n";
if ( -f $File::Find::name &&
(($dev,$inode,$mode,$nlink,$uid,$gid,$rdev,$size,
$atime,$mtime,$ctime,$blksize,$blocks) = lstat($_)) ) {
if ($inodes{$inode}++) {
print qq(Duplicate : $File::Find::name\n);
}
# print"$File::Find::name\n";
}
return(1);
}
Generator/tr-refresh-site-from-db.sh
#!/bin/sh
# 2022-07-25
PATH=/usr/local/bin:/usr/bin:/bin
conf=/etc/tr-initialize-static-site-generator.conf
documentroot=/var/www/techrights.org/htdocs
gemtextroot=/home/gemini/techrights.org
umask 0002
closure() {
test -d ${tmpdir} || exit 1
echo "Erasing temporary directories and their files."
rm -f ${tmpdir}/feed-*tmp.*
rmdir ${tmpdir}
}
cancel() {
echo "Cancelled."
closure
exit 2
}
# trap various signals to be able to erase temporary files
trap "cancel" 1 2 15
# prepare final permissions
echo "Creating temporary directories and files"
tmpdir=$(mktemp -d /tmp/refresh-tmp.XXXXXX)
chgrp techrights ${tmpdir}
chmod g=rwxs ${tmpdir}
# one file per feed
tmpfile_latest=$(mktemp -p ${tmpdir} feed-latest-tmp.XXXXXXX)
tmpfile_xhtml=$(mktemp -p ${tmpdir} feed-xhtml-tmp.XXXXXXX)
tmpfile_gemini=$(mktemp -p ${tmpdir} feed-gemini-tmp.XXXXXXX)
set -e
# create static XHTML and GemText
echo "Creating static XHTML and GemText hierarchies"
tr-extract-posts-sql.pl -c $conf \
-g -x -d $(date -d '-2 days' +"%Y%m%d") -s
# make a list of new posts for an SSI include file
echo "Updating SSI files"
tr-generate-feed.pl \
-c $conf \
-d $(date -d '-2 days' +'%Y%m%d') \
-n 15 \
-u \
-x \
> ${tmpfile_latest}
if test -s ${tmpfile_latest}; then
mv ${tmpfile_latest} ${documentroot}/latest-news.html
chmod 664 ${documentroot}/latest-news.html
fi
# write out an RSS feed for HTTP
echo "Writing the RSS feed for HTTP"
tr-generate-feed.pl \
-c $conf \
-a \
-d $(date -d '-2 days' +'%Y%m%d') \
-n 15 \
-x \
> ${tmpfile_xhtml}
if test -s ${tmpfile_xhtml}; then
mv ${tmpfile_xhtml} ${documentroot}/feed.xml
chmod 664 ${documentroot}/feed.xml
fi
# write out an Atom feed for Gemini
echo "Writing the Atom feed for Gemini"
tr-generate-feed.pl \
-c $conf \
-a \
-d $(date -d '-2 days' +'%Y%m%d') \
-n 15 \
-g \
-u \
> ${tmpfile_gemini}
if test -s ${tmpfile_gemini}; then
mv ${tmpfile_gemini} $gemtextroot/feed.xml
chmod 664 $gemtextroot/feed.xml || true
fi
# fix up the Gemini index
echo "Writing the Gemini index"
tr-generate-gemtext-index.sh
# list recent videos in Gemini index
echo "Writing the Gemini video index"
tr-gemini-latest-videos.sh
# create both Gemini and HTTP Chronological indexes
echo "Creating Chronogical Indexes for HTTP and Gemini"
tr-extract-global-index.pl -c $conf \
-x /var/www/techrights.org/htdocs/browse/ \
-g /home/gemini/techrights.org/browse/
closure
exit 0
Generator/tr-extract-global-index.pl
#!/usr/bin/perl
# See Git for history
# fetches posts from database and
# writes browsable, multi-page index
# of titles ordered by date created + date modified
use utf8;
use Getopt::Long;
use File::Path qw(make_path);
use DBI qw(:sql_types);
use Encode;
use open qw(:std :encoding(UTF-8));
use Config::Tiny;
use English;
use strict;
use warnings;
if ($ENV{'USER'} eq 'root' && ! $ENV{'EUID'}) {
print STDERR qq(Cannot run as root!\nAborting\n);
exit(1);
}
my ( $script ) = ( $0 =~ m|(?!.*/)(.*)\.[^\.]*$| );
# defaults
our $interval = 100;
our $VERBOSE = 0;
our %opt;
GetOptions (
"config|c=s" => \$opt{'c'},
"gemini:s" => \$opt{'g'},
"help" => \$opt{'h'},
"interval:i" => \$opt{'i'},
"xhtml:s" => \$opt{'x'},
"verbose+" => \$opt{'v'},
);
my $config = $opt{'c'};
if ( ! $opt{'c'} ) {
warn("Provide configuration file via the -c option.\n");
my $err = 1;
usage($script, 'sample.conf', $err);
}
if (defined($opt{'h'})) {
my $err = 0;
usage($script, $config, $err);
}
if (defined($opt{'v'})) {
$VERBOSE = $opt{'v'};
}
my $configuration = Config::Tiny->read($config)
or die("Could not read configuration file '$config': $!\n");
my $dbname = $configuration->{database}->{name}
or die("Database name missing from configuration file\n");
my $serverroot = $configuration->{webserver}->{serverroot}
or die("ServertRoot missing from configuration file\n");
my $geminiroot = $configuration->{gemini}->{geminiroot}
or die("GeminiRoot missing from configuration file\n");
my $dbfile = $serverroot . '/db/' . $dbname;
if (defined($opt{'i'}) && !$opt{'i'}) {
$interval = $opt{'i'};
}
my $xhtml_path = $serverroot . '/browse/';
my $gemtext_path = $geminiroot . '/browse/';
if (defined($opt{'g'}) && !$opt{'g'}) {
print "\nGemText path missing\n\n";
&usage($script);
} elsif (defined($opt{'g'}) && !$opt{'g'}) {
$gemtext_path = $opt{'g'} . '/browse/';
}
if (defined($opt{'x'}) && !$opt{'x'}) {
print "\nHTML path missing\n\n";
&usage($script);
} elsif (defined($opt{'x'}) && $opt{'x'}) {
$xhtml_path = $opt{'x'} . '/browse/';
}
&extract_and_write($dbfile, $xhtml_path, $gemtext_path);
exit(0);
sub usage {
my ($script, $config, $error) = @_;
print "USAGE:\n\n";
print "$script -c config [-hv] [-g path] [-x path]\n\n";
print " -c, --config path to configuraton file\n";
print " -i, --interval override default number of titles per page\n";
print " -g, --gemini override default destination path for GemText\n";
print " -x, --xhtml override default destination path for XHTML\n";
print " -v, --verbose show debugging info\n";
print "\n";
print " -h, --help show this message\n";
print "\n";
print "The -g and -x options can each be used to point to other paths\n";
print "and override the defaults:\n";
print " GemText path:\n\t$gemtext_path\n";
print " XHTML path:\n\t$xhtml_path\n";
print "\n";
if ($config eq 'sample.conf') {
print "Provide a configuration file, ";
} else {
print "Looking for config file in '$config',\n";
}
print <<EOC;
for example:
[database]
name = tr-static-site-generator.sqlite3
images = tr-static-site-generator-img.sqlite3
[gemini]
geminiroot = /home/gemini/site1.example.org/
[webserver]
documentroot = /var/www/site1.example.org/htdocs
serverroot = /var/www/site1.example.org/
EOC
if ($error) {
exit(1);
}
exit(0);
}
sub get_path {
my ($p,$default) = @_;
my $path = $default;
if ($p) {
my @directories = reverse(split(m/\//, $p));
my @canonical_path = ();
while (@directories) {
my $dir = shift @directories;
if (!length($dir)) {
next;
}
if ($dir eq ".") {
next;
}
if ($dir eq "..") {
shift @directories;
next;
}
push @canonical_path, $dir;
}
$path = '/'.join("/", reverse @canonical_path);
if ($path eq '/') {
$path = $default;
}
if (-d $path) {
if (-w $path) {
return($path);
} else {
die("The directory '$path' is not writable\n");
}
} elsif (-e $path) {
die("The destination '$path' is not a directory\n");
} else {
die("The directory '$path' does not exist\n");
}
}
return($path);
}
sub extract_and_write {
my ($dbfile, $xhtml_path, $gemtext_path) = @_;
my $dbh = DBI->connect("dbi:SQLite:dbname=$dbfile", undef, undef,
{ AutoCommit => 0, RaiseError => 1 })
or die("Could not open database '$dbfile': $!\n");
my $sth = &query($dbh);
$sth->execute()
or die "execute statement failed: $dbh->errstr()\n";
my @posts = ();
while (my $data = $sth->fetchrow_hashref) {
my %record = ();
my $recno = $data->{'recno'};
$record{'recno'} = $recno;
$record{'slug'} = $data->{'slug'};
$record{'ballast'} = $data->{'ballast'};
# mind the date format difference in keys and metadata tables
my $date = $data->{'date'};
$date =~ s|^([0-9]{4})([0-9]{2})([0-9]{2})$|$1/$2/$3| or die();
$record{'date'} = $date;
$record{'idate'} = $data->{'idate'};
$record{'week'} = $data->{'week'};
$record{'updated'} = $data->{'mod'};
$record{'title'} = decode('UTF-8', $data->{'title'});
push(@posts, { %record } );
}
$sth->finish;
$dbh->disconnect;
my @http_links = ();
my @gemini_links = ();
my $old_date = '';
while ( my $record = pop(@posts) ) {
# print Dumper($record);
my $recno = ${$record}{'recno'};
my $slug = decode('UTF-8', ${$record}{'slug'});
my $ballast = ${$record}{'ballast'};
my $date = ${$record}{'date'};
my $idate = ${$record}{'idate'};
my $title = ${$record}{'title'};
my $week = ${$record}{'week'};
my $updated = ${$record}{'updated'};
my ( $iso_date ) = ( $idate =~ m/^(.*)T/ );
# http / https
if ($old_date && $iso_date ne $old_date) {
push(@http_links, [1, $week, ' '] );
push(@gemini_links, [1, $week, ' '] );
}
my $xlink = &xhtml_link($title, $date, $idate,
$slug, $ballast, $updated);
push(@http_links, [$updated, $week, $xlink] );
# gemini
my $glink = &gemtext_link($title, $date, $idate,
$slug, $ballast, $updated);
push(@gemini_links, [$updated, $week, $glink] );
$old_date = $iso_date;
}
$xhtml_path = &get_path($opt{'x'}, $xhtml_path);
$gemtext_path = &get_path($opt{'g'}, $gemtext_path);
&prepare_directory($xhtml_path);
&prepare_directory($gemtext_path);
&write_html($xhtml_path, @http_links);
&write_gemtext($gemtext_path, @gemini_links);
return(1);
}
sub query {
my ($dbh) = @_;
my $sth; # Statement handle object
# list posts twice if modified at least a day from the creation date
# the week calculation is probably unnecesary and could be removed
my $query = qq(
SELECT t1.recno AS recno,
printf('%04d %02d',
strftime('%Y', t2.value),
strftime('%W', t2.value)) AS week,
t1.value AS title,
t2.value AS idate,
CASE
WHEN unixepoch(t2.value) - unixepoch(t3.value) > 86400
THEN 1
ELSE 0
END mod,
t4.date,
t4.ballast,
t4.slug
FROM metadata AS t1
INNER JOIN metadata AS t2
ON t1.recno = t2.recno
AND t1.term = 'dc.title'
AND t2.term = 'dc.date.modified'
INNER JOIN metadata AS t3
ON t1.recno = t3.recno
AND t3.term = 'dc.date.created'
INNER JOIN keys AS t4
ON t1.recno = t4.recno
WHERE mod > 0
UNION
SELECT
t5.recno AS recno,
printf('%04d %02d',
strftime('%Y', t6.value),
strftime('%W', t6.value)) AS week,
t5.value AS title,
t6.value AS idate,
0,
t7.date,
t7.ballast,
t7.slug
FROM metadata AS t5
INNER JOIN metadata AS t6
ON t5.recno = t6.recno
AND t5.term = 'dc.title'
AND t6.term='dc.date.created'
INNER JOIN keys AS t7
ON t5.recno = t7.recno
ORDER BY idate DESC;
);
if ($VERBOSE > 1) {
print "Main Query= $query\n";
}
$sth = $dbh->prepare($query);
return($sth);
}
sub xhtml_link {
my ($title, $date, $idate, $slug, $ballast, $updated) = @_;
# should this be the date modified or date created?
my ( $time ) = ( $idate =~ m/T(\d\d:\d\d)/ );
my ( $iso_date ) = ( $idate =~ m/^(.*)T/ );
$iso_date =~ s|/|-|g;
# lll
my $href;
if (! $ballast) {
$href = '/n/'.$date.'/'.$slug.'.shtml';
} else {
$href = '/n/'.$date.'/'.$slug.".$ballast.shtml";
}
if ($updated) {
$title .= ' [updated]';
}
my $link = qq(<span class="date">$iso_date <sup>$time</sup></span> )
. qq(<a href="$href">$title</a>);
return($link);
}
sub gemtext_link {
my ($title, $date, $idate, $slug, $ballast, $updated) = @_;
# should this be the date modified or date created?
my $iso_date = $idate;
$iso_date =~ s|/|-|g;
$iso_date =~ s|T.*$||;
my $href;
if (! $ballast) {
$href = '/n/'.$date.'/'.$slug.'.shtml';
} else {
$href = '/n/'.$date.'/'.$slug.".$ballast.shtml";
}
if ($updated) {
$title .= ' [updated]';
}
my $link = qq(=> $href $iso_date $title);
return($link);
}
sub write_html {
my ($xhtml_path, @http_links) = @_;
if ($opt{'v'}) {
print $xhtml_path,"\n\n";
}
my $count = 0;
my $page = 1;
my @buffer = ();
my $size = length(int(($#http_links + 1)));
my $file = '';
my $first = '';
my $link = '';
my $old_week = '';
while ( $#http_links >= 0 ) {
my $row = shift(@http_links);
my ( $updated, $week, $link ) = @$row;
# don't start a page with an empty row
if ( $#buffer >= 0 || $link =~ m/<a/ ) {
push (@buffer, $link);
if ( ! $updated && $link =~ m/<a/ ){
$count++;
}
} else {
next;
}
if ( $count >= $interval && $week ne $old_week) {
# don't end a page with an empty row
if ( $link !~ m/<a/ ) {
pop(@buffer);
}
my ( $prevlink, $nextlink ) = &prevnexthtml($page,
$size, $#http_links);
my $xhtml = &xhtml_document($page, $interval,
$prevlink, $nextlink, @buffer);
$file = sprintf("%s/page-%0${size}d.shtml",
$xhtml_path, $page);
if ( $opt{'v'} ) {
print "$file\n";
}
&save_html_file($file, $xhtml);
if (!$first) {
$first = $file;
my $firstfile = $xhtml_path.'/index.shtml';
if ( -l $firstfile ) {
unlink($firstfile) or die();
}
symlink($first, $firstfile) or die();
}
@buffer = ();
$page++;
}
$old_week = $week;
}
if ( $#buffer >= 0 ) {
my ( $prevlink, $nextlink ) = &prevnexthtml($page, $size, -1);
my $xhtml = &xhtml_document($page, $interval,
$prevlink, $nextlink, @buffer);
$file = sprintf("%s/page-%0${size}d.shtml", $xhtml_path, $page);
if (!$first) {
$first = $file;
my $firstfile = $xhtml_path.'/index.shtml';
if ( -l $firstfile ) {
unlink($firstfile) or die();
}
symlink($first, $firstfile) or die();
}
&save_html_file($file, $xhtml);
if ( $opt{'v'} ) {
print "$file\n";
}
}
if ( $opt{'v'} ) {
print qq(Last = $file\n);
}
my $lastfile = $xhtml_path.'/latest.shtml';
if ( -l $lastfile ) {
unlink($lastfile) or die();
}
symlink($file, $lastfile) or die();
return(1);
}
sub prevnexthtml {
my ($page, $size, $more) = @_;
my ($prevlink, $nextlink) = ('','');
if ( $page > 2 ) {
$prevlink = sprintf("/browse/page-%0${size}d.shtml", $page - 1);
$prevlink = qq(<a href="$prevlink">Page ). ($page-1) .qq(</a>);
} elsif ( $page == 2 ) {
$prevlink = qq(/browse/index.shtml);
$prevlink = qq(<a href="$prevlink">Page 1</a>);
}
if ( $more >= 0 ) {
$nextlink = sprintf("/browse/page-%0${size}d.shtml", $page+1);
$nextlink = qq(<a href="$nextlink">Page ).($page+1).qq(</a>);
}
return($prevlink, $nextlink);
}
sub xhtml_document {
my ($page, $interval, $prevlink, $nextlink, @buffer) = @_;
my $title = "Chronological Index, Page ". $page;
my $posts = '<li>'.join("</li>\n\t<li>", @buffer).'</li>';
my $xhtml = <<"EOHTML";
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>$title</title>
<link rel="stylesheet" href="/CSS/techrights.css"
media="screen" type="text/css" />
<link rel="stylesheet" href="/CSS/techrights.index.css"
media="screen" type="text/css" />
<link rel="alternate" type="application/rss+xml" href="/feed.xml"
title="Techrights" />
<link rel="shortcut icon" href="/favicon.ico" type="image/x-icon" />
</head>
<body>
<!--#include virtual="/header.html"-->
<!--#include virtual="/feeds.html"-->
<div class="archive">
<!--#include virtual="/navigation.html"-->
<div class="navigation2">
<ul>
<li>$prevlink</li>
<li>$nextlink</li>
</ul>
</div>
<h1>$title</h1>
<ul class="archive">
$posts
</ul>
<p>
Time in UTC
</p>
</div>
<div class="navigation2">
<ul>
<li>$prevlink</li>
<li>$nextlink</li>
</ul>
</div>
<!--#include virtual="/footer.html"-->
</body>
</html>
EOHTML
return ($xhtml);
}
sub save_html_file {
my ($file, $xhtml) = @_;
my $doc;
# $xhtml = decode('UTF-8',$xhtml);
# $xhtml = encode('UTF-8',$xhtml);
open($doc, '>', $file)
or die("Could not open '$file' for writing: $!\n");
print $doc $xhtml;
close($doc);
return(1);
}
sub write_gemtext {
my ($gemtext_path, @gemini_links) = @_;
if ($opt{'v'}) {
print $gemtext_path,"\n\n";
}
my $count = 0;
my $page = 1;
my @buffer = ();
my $size = length(int(($#gemini_links + 1)));
my $file = '';
my $first = '';
my $link = '';
my $old_week = '';
while ( $#gemini_links >= 0 ) {
my $row = shift(@gemini_links);
my ( $updated, $week, $link ) = @$row;
# don't start a page with an empty row
if ( $#buffer >= 0 || $link =~ m/^\=\>/ ) {
push (@buffer, $link);
if ( ! $updated && $link =~ m/^\=\>/ ) {
$count++;
}
} else {
next;
}
if ( $count >= $interval && $week ne $old_week ) {
my ( $prevlink, $nextlink ) = &prevnextgemtext($page, $size,
$#gemini_links);
my $gemtext = &gemtext_document($page,
$prevlink, $nextlink, @buffer);
$file = sprintf("%s/page-%0${size}d.gmi", $gemtext_path, $page);
if ( $opt{'v'} ) {
print "$file\n";
}
&save_gemtext_file($file, $gemtext);
if (!$first) {
$first = $file;
my $firstfile = $gemtext_path.'/index.gmi';
if ( -l $firstfile ) {
unlink($firstfile) or die();
}
symlink($first, $firstfile) or die();
}
@buffer = ();
$page++;
}
$old_week = $week;
}
if ( $#buffer >= 0 ) {
my ( $prevlink, $nextlink ) = &prevnextgemtext($page, $size, -1);
my $gemtext = &gemtext_document($page,
$prevlink, $nextlink, @buffer);
$file = sprintf("%s/page-%0${size}d.gmi", $gemtext_path, $page);
if ( $opt{'v'} ) {
print "$file\n";
}
if (!$first) {
$first = $file;
my $firstfile = $gemtext_path.'/index.gmi';
if ( -l $firstfile ) {
unlink($firstfile) or die();
}
symlink($first, $firstfile) or die();
}
&save_gemtext_file($file, $gemtext);
}
if ( $opt{'v'} ) {
print qq(Last = $file\n);
}
my $lastfile = $gemtext_path.'/latest.gmi';
if ( -l $lastfile ) {
unlink($lastfile) or die();
}
symlink($file, $lastfile) or die();
return(1);
}
sub prevnextgemtext {
my ($page, $size, $more) = @_;
my ($prevlink, $nextlink) = ('','');
if ( $page > 2 ) {
$prevlink = sprintf("/browse/page-%0${size}d.gmi", $page-1);
$prevlink = qq(=> $prevlink Page ). ($page - 1);
} elsif ( $page == 2 ) {
$prevlink = qq(/browse/index.gmi);
$prevlink = qq(=> $prevlink Page 1);
}
if ( $more >= 0 ) {
$nextlink = sprintf("/browse/page-%0${size}d.gmi", $page +1);
$nextlink = qq(=> $nextlink Page ).($page+1);
}
return($prevlink, $nextlink);
}
sub gemtext_document {
my ($page, $prevlink, $nextlink, @buffer) = @_;
my $title = "Chronological Index, Page $page";
my $posts = join("\n", @buffer);
my $gemtext = <<"EOGEMTEXT";
Techrights
# $title
$nextlink
$prevlink
$posts
Time in UTC.
$nextlink
$prevlink
=> / gemini.techrights.org
EOGEMTEXT
return ($gemtext);
}
sub save_gemtext_file {
my ($file, $gemtext) = @_;
my $doc;
open($doc, '>', $file)
or die("Could not open '$file' for writing: $!\n");
print $doc $gemtext;
close($doc);
return(1);
}
sub prepare_directory {
my ($path) = @_;
if ( -e $path) {
if ( ! -d $path) {
warn "Target already exists but is not a directory: '$path'\n";
return(0);
}
if ( ! -w $path) {
print STDERR "Target is not a writable: '$path'\n";
return(0);
}
# path exists and is writable
return(1);
} else {
make_path($path,{mode=>0775})
or die("Could not create path '$path' : $!\n");
print "Created directory '$path'\n" if ($VERBOSE);
return(1);
}
}
sub is_file_writable {
my ($file) = @_;
# overwrite by default
if (-e $file) {
if (-f $file) {
if (-w $file) {
return(1);
} else {
warn("Destination '$file' is not writable\n");
return(0);
}
} else {
warn("Destination '$file' is not a regular file\n");
return(0);
}
} else {
return(1);
}
}
Generator/tr-update-and-refresh-from-db.sh
#!/bin/sh # 2022-07-26 PATH=/usr/local/bin:/usr/bin:/bin case $USER in 'tuxmachines') author='Tux Machines' ;; 'roy') author='Roy Schestowitz' ;; 'rianne') author='Rianne Schestowitz' ;; 'marius') author='Marius Nestor' ;; *) author=$USER ;; esac # update a record either by URL or by RecordNumber tr-update-entry-sql.pl -c /etc/tr-initialize-static-site-generator.conf \ -u $@ || tr-update-entry-sql.pl -r $@ # update both the XHTML and Gemtext hierarchies tr-refresh-site-from-db.sh exit 0
Generator/tr-stats-weekly-pages-cron.sh
#!/bin/sh
# wrapper script for tr-stats-weekly-pages.pl
PATH=/usr/local/bin:/usr/bin:/bin
set -e
# sort gzipped log files nummerically so that the --sort option
# can be used to reduce run duration by ensuring that the log
# data is fed to the perl script in chronological order (as much as feasible)
# the perl one-liner is to remove the status column, if present
readlog() {
base=$1
log=$2
( cat /var/log/apache2/${base}-access.log \
/var/log/apache2/${base}-access.log.1;
zcat $( ls /var/log/apache2/${base}-access.log*z \
| sort -t . -k 3,3n ) ) \
| tr-stats-weekly-pages.pl --table --sorted --status 200,304 \
| perl -p -e 's|</td>\s+<td>\d{3}</td>\s+|</td>\t|;' \
> /var/log/${log}
}
readlog techrights tr-stats.log
readlog tuxmachines tm-stats.log
exit 0
Generator/.directory-listing-ok
Generator/tr-extract-summary.pl
#!/usr/bin/perl
# 2023-01-25
# fetches posts from the database and makes an HTML DL list based
# on author and title with the description, grouped by date
use utf8;
use Getopt::Long;
use Date::Calc qw/Today Add_Delta_YM Add_Delta_YMD/;
use DBI qw(:sql_types);
use HTML::TreeBuilder::XPath;
use HTML::Entities qw/encode_entities_numeric decode_entities/;
use Config::Tiny;
use English;
use strict;
use warnings;
our %opt;
our $VERBOSE = 0;
GetOptions ("config=s" => \$opt{'c'},
"date=s" => \$opt{'d'},
"help" => \$opt{'h'},
"verbose+" => \$opt{'v'},
);
my ( $script ) = ( $0 =~ m|(?!.*/)(.*)\.[^\.]*$| );
my $config = $opt{'c'};
if ( ! $opt{'c'} ) {
warn("Provide configuration file via the -c option.\n");
my $err = 1;
usage($script, 'sample.conf', $err);
}
if (defined($opt{'h'})) {
my $err = 0;
&usage($script, $config, $err);
}
if (defined($opt{'v'})) {
$VERBOSE = $opt{'v'};
}
my $configuration = Config::Tiny->read($config)
or die("Could not read configuration file '$config': $!\n");
my $dbname = $configuration->{database}->{name}
or die("Database name missing from configuration file\n");
my $serverroot = $configuration->{webserver}->{serverroot}
or die("ServertRoot missing from configuration file\n");
my $dbfile = $serverroot . '/db/' . $dbname;
my ($year, $month, $day) = &get_date($opt{'d'});
$opt{'s'} = 1;
if ($opt{'s'}) {
print "Starting Date: $year/$month/$day\n" if ($VERBOSE);
} else {
print "Date: $year/$month/$day\n" if ($VERBOSE);
}
&extract_and_write($dbfile, $year,$month,$day);
exit(0);
sub usage {
my ($script, $config, $error) = @_;
print "USAGE:\n\n";
print "$script -c config [-hv] [-d date]\n\n";
print " -c, --config path to configuration file\n";
print " -d, --date date as YYYYMMDD, defaults to a month ago\n";
print " -v, --verbose show debugging info\n";
print " -h, --help show this message\n";
print "\n";
print "Summmarize posts by title and author, grouped by date, since ";
print "the designated date. If no date is given, then start from ";
print "one month ago.\n";
print "\n";
if ($config eq 'sample.conf') {
print "Provide a configuration file, ";
} else {
print "Looking for config file in '$config',\n";
}
print <<EOC;
for example:
[database]
name = tr-static-site-generator.sqlite3
images = tr-static-site-generator-img.sqlite3
[gemini]
geminiroot = /home/gemini/site1.example.org/
[webserver]
documentroot = /var/www/site1.example.org/htdocs
serverroot = /var/www/site1.example.org/
EOC
if ($error) {
exit(1);
}
exit(0);
}
# validate and return date from option XOR current date minus one month
sub get_date {
my ($d) = @_;
my ($year, $month, $day);
my $date = '';
if ($d) {
if ( ($date) = ($d =~ m/^([0-9]{4}-[0-9]{2}-[0-9]{2})$/)
or
($date) = ($d =~ m/^([0-9]{4}[0-9]{2}[0-9]{2})$/)
) {
$date =~ s/-//g;
}
if (!$date) {
print STDERR qq(Invalid date '$d'\n);
exit(1);
}
($year,$month,$day) =
($date =~ m/^([0-9]{4})([0-9]{2})([0-9]{2})$/);
if (! check_date($year,$month,$day)) {
print STDERR qq(Invalid date '$date', );
print STDERR qq(Use YYYY-MM-DD'\n);
exit(1);
}
}
# if no date was provide, start from a month ago
if (!$date) {
($year,$month,$day) = Today(1); # get date GMT
($year,$month,$day) = Add_Delta_YM($year,$month,$day,0,-1);
($year,$month,$day) = Add_Delta_YMD($year,$month,$day,0,0,1);
$year = sprintf("%04d", $year);
$month = sprintf("%02d", $month);
$day = sprintf("%02d", $day);
}
return($year, $month, $day);
}
# get the relevant records from the database and convert to HTML
sub extract_and_write {
my ($dbfile, $year,$month,$day) = @_;
my $summary = &extract($dbfile, $year,$month,$day);
if (!$summary) {
$summary = qq(<p>No records since $year-$month-$day</p>\n);
}
my $html = &new_xhtml_document($year,$month,$day,$summary);
print $html;
return(1);
}
# get the relevant records from the SQLite3 database
sub extract {
my ($dbfile, $year,$month,$day) = @_;
my $dbh = DBI->connect("dbi:SQLite:dbname=$dbfile", undef, undef,
{ AutoCommit => 0, RaiseError => 1 })
or die("Could not open database '$dbfile': $!\n");
my $date = "$year-$month-$day";
# fetch relevant records, starting with specified date
my $sth = &query($date, $dbh);
# process found records into a sortable hash
my $count = 0;
my %record = ();
while (my $data = $sth->fetchrow_hashref) {
my $recno = $data->{'recno'};
my $date = substr($data->{'ts'},0,10);
my $timestamp = $data->{'ts'};
my $author = $data->{'author'};
my $title = $data->{'title'};
my $description = $data->{'description'};
$record{$recno}->{'date'} = $date;
$record{$recno}->{'timestamp'} = $timestamp;
$record{$recno}->{'author'} = $author;
$record{$recno}->{'title'} = $title;
$record{$recno}->{'description'} = $description;
my $ballast = $data->{'ballast'};
my $slug = $data->{'slug'};
my $file;
if (!$ballast) {
$file = "$date$slug.shtml";
} else {
$file = "$date/$slug.$ballast.shtml";
}
$file =~ s{^([0-9]{4})-([0-9]{2})-([0-9]{2})} {$1/$2/$3/};
$record{$recno}->{'href'} = '/n/'.$file;
# number of records processed
$count++;
}
$sth->finish;
$dbh->disconnect;
my $oldDate = 0;
my $ddSummary = HTML::Element->new('dd'); # actual day
my $daySummary = HTML::Element->new('dl'); # wrapper for each day
my $summary = HTML::Element->new('dl'); # grand list of days
# sort hash of processed records and build HTML definition list(s)
for my $rec (sort {$record{$a}->{'date'} cmp $record{$b}->{'date'}
or $record{$a}->{'author'} cmp $record{$b}->{'author'}
or $record{$a}->{'timestamp'} cmp $record{$b}->{'timestamp'}
or $a cmp $b } keys %record) {
my $author = $record{$rec}->{'author'};
my $title = $record{$rec}->{'title'};
my $description = $record{$rec}->{'description'};
my $date = $record{$rec}->{'date'};
my $timestamp = $record{$rec}->{'timestamp'};
my $href = $record{$rec}->{'href'};
if ($VERBOSE) {
print "$rec: $date, $timestamp: $author\n";
print "\t$href\n";
}
# beginning of new day
if ($oldDate ne $date) {
$ddSummary->push_content($daySummary);
$summary->push_content($ddSummary);
# clear the buffers for each day and the day wrapper
$daySummary = HTML::Element->new('dl');
$ddSummary = HTML::Element->new('dd');
# add a defninition list title for the next date
my $dt = HTML::Element->new('dt');
$dt->push_content($date);
$summary->push_content($dt);
# remember working date
$oldDate = $date;
}
# build entry hyperlink to article
my $anchor = HTML::Element->new('a', 'href'=>$href);
$anchor->push_content($title);
my $dt = HTML::Element->new('dt'); # entry hyperlink + title
my $dd1 = HTML::Element->new('dd'); # entry author + description
$dt->push_content($anchor);
$dd1->push_content($author." : ".$description);
# add link+title, author+description to list for working date
$daySummary->push_content($dt);
$daySummary->push_content($dd1);
}
# harvest any remaining buffer content from the day and then its wrapper
$ddSummary->push_content($daySummary);
$summary->push_content($ddSummary);
if (!$count) {
if ($VERBOSE) {
print "No records processed.\n\n";
}
return(" <p>No records processed.</p>\n");
}
# convert to indented HTML with closing tags for each element
my $summaryhtml = $summary->as_HTML( '', ' ', {} );
$summary->delete;
return($summaryhtml);
}
# actually query the SQLite3 daabawse
sub query {
my ($date, $dbh) = @_;
# $sth Statement handle object
my $sth;
# ts = full datetime stamp
# find date modified, author, title, description, and file name parts
my $query = qq(
SELECT recno, ts, author, title, description, ballast, slug
FROM (
SELECT recno, value AS ts
FROM metadata
WHERE term='dc.date.modified'
AND value>=?) AS T1
JOIN (
SELECT recno, value AS author
FROM metadata
WHERE term='dc.creator') AS T2
USING(recno)
JOIN (
SELECT recno, value AS title
FROM metadata
WHERE term='dc.title') AS T3
USING(recno)
JOIN (
SELECT recno, value AS description
FROM metadata
WHERE term='dc.description') AS T4
USING(recno)
JOIN (
SELECT recno, ballast, slug FROM keys ) AS T5
USING(recno)
ORDER BY SUBSTR(ts,1,10), author, ts desc;
);
$sth = $dbh->prepare($query)
or die "prepare statement failed: $dbh->errstr()\n";
$sth->execute($date)
or die "execute statement failed: $dbh->errstr()\n";
if ($VERBOSE > 1) {
print "Main Query= $query\n";
}
return($sth);
}
# fill in a template to create an HTML page
sub new_xhtml_document {
my ($year,$month,$day,$summary) = @_;
my $html = <<"EOHTML";
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>Techrights posts since $year-$month-$day</title>
<link rel="stylesheet" href="/CSS/techrights.css"
media="screen" type="text/css" />
<link rel="alternate" type="application/atom+xml" href="/feed.xml"
title="Techrights" />
<!--
<link rel="shortcut icon" href="/i/whitejazz_favicon_0.ico"
type="image/x-icon" /></head>
-->
<body>
<!--#include virtual="/header.html"-->
<!--#include virtual="/feeds.html"-->
<div class="monthly">
<!--#include virtual="/navigation.html"-->
<h1>Techrights posts since $year-$month-$day</h1>
$summary
</div>
<!--#include virtual="/footer.html"-->
</body>
</html>
EOHTML
return($html);
}
Generator/tr-parse-old-static-html.pl
#!/usr/bin/perl
use utf8;
use Getopt::Long;
use Cwd qw(abs_path);
use File::Find qw(find);
use File::Glob qw(:bsd_glob);
use HTML::TreeBuilder::XPath;
use DBI qw(:sql_types); # sqlite3
use English;
use strict;
use warnings;
my $dbfile = q(/var/www/techrights.org/db/tr-static-site-generator.sqlite3);
# my $dbfile = q(/tmp/generator.sqlite3);
our %opt = (
'documentroot' => '',
'verbose' => 0,
'help' => 0,
);
GetOptions (
"documentroot|d=s" => \$opt{'documentroot'}, # flag
"help|h" => \$opt{'help'}, # flag
"verbose|v+" => \$opt{'verbose'}, # flag, multiple settings
);
my ($script) = ($0 =~ m|([^/]+)$|);
if ($opt{'help'}) {
&usage($script);
}
if (not $opt{'documentroot'} or not -d $opt{'documentroot'}) {
&usage($script, 'missing valid --documentroot');
} else {
# remove trailing slash from path
$opt{'documentroot'} =~ s|/$||;
}
my @filenames;
while (my $file = shift) {
my @files = bsd_glob($file);
foreach my $f (@files) {
if ($f eq abs_path($f)) {
push(@filenames, $f);
} else {
$f =~ s|^/+||;
$f = $opt{'documentroot'} .'/'. $f;
if ( -e $f) {
push(@filenames, $f);
} else {
print qq(Bad file or path: $f\n);
}
}
}
}
if($#filenames < 0) {
&usage($script);
}
our %files;
&find_files(@filenames);
my ($recnos, $bodies, $comments, $metadata) = &read_files();
&write_to_database($dbfile, $recnos, $metadata, $bodies, $comments);
exit(0);
sub usage {
my ($script, $reason) = @_;
print qq($reason\n);
if ($reason) {
exit(1);
}
exit(0);
}
sub find_files {
my (@files) = @_;
for my $file (@files) {
print qq(F=$file\n);
if (! $file ) {
next;
}
File::Find::find({wanted => \&wanted}, $file);
}
return(1);
}
sub wanted {
if ($File::Find::name =~ m|\.shtml$|) {
# print "D=$File::Find::name\n";
$files{$File::Find::name}++;
return($File::Find::name);
}
return(0);
}
sub read_files {
my ($_recnos, $_bodies, $_comments, $_metadata) = @_;
my %recnos = %{$_recnos};
my %bodies = %{$_bodies};
my %comments = %{$_comments};
my %metadata = %{$_metadata};
my $counter = 0; # llll
for my $f (sort keys %files) {
my $xhtml = HTML::TreeBuilder::XPath->new;
$xhtml->store_comments(1);
$xhtml->implicit_tags(1);
$xhtml->parse_file($f)
or die("Could not parse '$f' : $!\n");
my ($recno, $rawtext_body, $rawtext_comments, %m) = &parse_file($xhtml);
$recnos{$f} = $recno;
$metadata{$f} = {%m};
$bodies{$f} = $rawtext_body;
$comments{$f} = $rawtext_comments;
$xhtml->delete;
last if ($counter++ == 1000);
}
return(\%recnos, \%bodies, \%comments, \%metadata);
}
sub parse_file {
my ($xhtml) = @_;
my %file_metadata = ();
for my $title($xhtml->findnodes('//title')) {
push(@{$file_metadata{'dtitle'}}, $title->as_text);
}
my $recno = 0;
for my $r ($xhtml->findnodes('//head/comment()')) {
($recno) = ($r->as_XML =~ m/(\d+)/);
}
FieldLoop:
for my $field ($xhtml->findnodes('//meta[@name and @content]')) {
if ($field->{'name'} !~ m|^dc\.|) {
next;
}
if (! $field->{'content'}) {
next;
}
my $term = $field->{'name'};
my $value = $field->{'content'};
for my $t (@{$file_metadata{$term}}) {
if ($value eq $t) {
next FieldLoop;
}
}
push( @{$file_metadata{$term}}, $value );
}
my $rawtext_body ='';
my $rawtext_comments='';
for my $body ($xhtml->findnodes('//div[@class="oldpost"]')) {
for my $nav ($xhtml->findnodes('//div[@class="navigation"]')) {
$nav->delete;
}
for my $comments ($body->findnodes('//div[@class="comments"]')) {
for my $h1 ($comments->findnodes('h1[@class="comment"]')) {
$h1->delete;
}
$rawtext_comments = $comments->format;
$comments->delete;
}
$rawtext_body = $rawtext_body . $body->format;
}
return($recno, $rawtext_body, $rawtext_comments, %file_metadata);
}
sub write_to_database {
my ($dbfile, $recnos, $metadata, $bodies, $comments) = @_;
my $dbh = DBI->connect("dbi:SQLite:dbname=$dbfile", undef, undef,
{ AutoCommit => 0, RaiseError => 1,
on_connect_do => "PRAGMA foreign_keys = ON",
})
or die("Could not open database '$dbfile': $!\n");
&initialize_db($dbh);
&write_filenames_to_database($dbh, $recnos, $metadata);
&write_metadata_to_database($dbh, $metadata);
&write_bodies_to_database($dbh, $bodies);
&write_comments_to_database($dbh, $comments);
$dbh->disconnect;
return(1);
}
sub initialize_db {
my ($dbh) = @_;
print qq(Intitializing db\n);
my @queries = (
qq(DROP TABLE IF EXISTS "old_keys"),
qq(DROP TABLE IF EXISTS "old_metadata"),
qq(DROP TABLE IF EXISTS "old_rawtext_body"),
qq(DROP TABLE IF EXISTS "old_rawtext_comments"),
qq(DROP TABLE IF EXISTS "old_rawtext_metadata"),
qq(DROP TABLE IF EXISTS "old_fts5_body"),
qq(DROP TABLE IF EXISTS "old_fts5_comments"),
qq(DROP TABLE IF EXISTS "old_fts5_metadata"),
qq(DROP TRIGGER IF EXISTS rawtext_insert_b),
qq(DROP TRIGGER IF EXISTS rawtext_update_b),
qq(DROP TRIGGER IF EXISTS rawtext_delete_b),
qq(DROP TRIGGER IF EXISTS rawtext_insert_c),
qq(DROP TRIGGER IF EXISTS rawtext_update_c),
qq(DROP TRIGGER IF EXISTS rawtext_delete_c),
qq(DROP TRIGGER IF EXISTS rawtext_insert_m),
qq(DROP TRIGGER IF EXISTS rawtext_update_m),
qq(DROP TRIGGER IF EXISTS awtext_delete_m),
qq(CREATE TABLE IF NOT EXISTS "old_keys" (
recno integer not null primary key,
file varchar(256) not null)),
qq(CREATE TABLE IF NOT EXISTS "old_metadata"(
recno integer,
term varchar(25) not null,
value varchar(256) not null)),
qq(CREATE TABLE IF NOT EXISTS "old_rawtext_body"(
recno integer primary key unique,
fulltext text not null)),
qq(CREATE TABLE IF NOT EXISTS "old_rawtext_comments"(
recno integer primary key unique,
fulltext text not null)),
qq(CREATE TABLE IF NOT EXISTS "old_rawtext_metadata"(
recno integer primary key unique,
fulltext text not null)),
qq(CREATE VIRTUAL TABLE IF NOT EXISTS "old_fts5_body" USING FTS5(
fulltext,
content=old_rawtext_body,
content_rowid=recno)),
qq(CREATE VIRTUAL TABLE IF NOT EXISTS "old_fts5_comments" USING FTS5(
fulltext,
content=old_rawtext_comments,
content_rowid=recno)),
qq(CREATE VIRTUAL TABLE IF NOT EXISTS "old_fts5_metadata" USING FTS5(
fulltext,
content=old_rawtext_metadata,
content_rowid=recno)),
qq(CREATE TRIGGER IF NOT EXISTS rawtext_insert_b
AFTER INSERT ON old_rawtext_body BEGIN
INSERT INTO old_fts5_body(rowid, fulltext)
VALUES (new.recno, new.fulltext);
END;),
qq(CREATE TRIGGER IF NOT EXISTS rawtext_update_b
AFTER UPDATE ON old_rawtext_body BEGIN
INSERT INTO old_fts5_body(old_fts5_body, rowid, fulltext)
VALUES('delete', old.recno, old.fulltext);
INSERT INTO old_fts5_body(rowid, fulltext)
VALUES (new.recno, new.fulltext);
END;),
qq(CREATE TRIGGER IF NOT EXISTS rawtext_delete_b
AFTER DELETE ON old_rawtext_body BEGIN
INSERT INTO old_fts5_body(old_fts_body, rowid, fulltext)
VALUES('delete', old.recno, old.fulltext);
END;),
qq(CREATE TRIGGER IF NOT EXISTS rawtext_insert_c
AFTER INSERT ON old_rawtext_comments BEGIN
INSERT INTO old_fts5_comments(rowid, fulltext)
VALUES (new.recno, new.fulltext);
END;),
qq(CREATE TRIGGER IF NOT EXISTS rawtext_update_c
AFTER UPDATE ON old_rawtext_comments BEGIN
INSERT INTO old_fts5_comments(old_fts5_comments,
rowid, fulltext)
VALUES('delete', old.recno, old.fulltext);
INSERT INTO old_fts5_comments(rowid, fulltext)
VALUES (new.recno, new.fulltext);
END;),
qq(CREATE TRIGGER IF NOT EXISTS rawtext_delete_c
AFTER DELETE ON old_rawtext_comments BEGIN
INSERT INTO old_fts5_comments(old_fts5_comments,
rowid, fulltext)
VALUES('delete', old.recno, old.fulltext);
END;),
qq(CREATE TRIGGER IF NOT EXISTS rawtext_insert_m
AFTER INSERT ON old_rawtext_metadata BEGIN
INSERT INTO old_fts5_metadata(rowid, fulltext)
VALUES (new.recno, new.fulltext);
END;),
qq(CREATE TRIGGER IF NOT EXISTS rawtext_update_m
AFTER UPDATE ON old_rawtext_metadata BEGIN
INSERT INTO old_fts5_metadata(old_fts5_metadata,
rowid, fulltext)
VALUES('delete', old.recno, old.fulltext);
INSERT INTO old_fts5_metadata(rowid, fulltext)
VALUES (new.recno, new.fulltext);
END;),
qq(CREATE TRIGGER IF NOT EXISTS rawtext_delete_m
AFTER DELETE ON old_rawtext_metadata BEGIN
INSERT INTO old_fts5_metadata(old_fts5_metadata,
rowid, fulltext)
VALUES('delete', old.recno, old.fulltext);
END;),
);
my $sth;
foreach my $query (@queries) {
if ($opt{'verbose'} > 2) {
print qq(Q: $query\n\n);
}
$sth = $dbh->prepare($query)
or die("prepare statement failed: $dbh->errstr()\n");
$sth->execute
or die("execute statement failed: $dbh->errstr()\n");
}
$dbh->commit;
$sth->finish;
return(1);
}
sub write_filenames_to_database {
my ($dbh, $_recnos, $_metadata) = @_;
my %metadata = %{$metadata};
my $recnos = %{$_recnos};
for my $file (sort keys %metadata) {
# the key for the record number is the full, absolute path
my $recno = $$recnos{$file};
$file =~ s|^$opt{'documentroot'}||;
my $query = qq(INSERT OR REPLACE INTO
old_keys(recno, file) VALUES(?, ?));
my $sth = $dbh->prepare($query)
or die("prepare statement failed: $dbh->errstr()\n");
$sth->execute($recno, $file)
or die("execute statement failed: $dbh->errstr()\n");
$sth->finish;
}
$dbh->commit;
return(1);
}
sub write_metadata_to_database {
my ($dbh, $_metadata) = @_;
my %metadata = %{$_metadata};
my $query = qq(SELECT recno FROM old_keys WHERE file = ?);
my $sth = $dbh->prepare($query)
or die("prepare statement failed: $dbh->errstr()\n");
for my $absfile (sort keys %{$metadata}) {
# the first-level key for the metadata hash of hashes
# is the full, absolute path
my $file = $absfile;
$file =~ s|$opt{'documentroot'}||;
# start by retrieving the record number for the file
$sth->execute($file)
or die("execute statement failed: $dbh->errstr()\n");
my $row = $sth->fetchrow_hashref or next;
my $recno = $row->{'recno'} or next;
my @record = (); # bufabsfer for fulltext of metadata
my %m = %{$$metadata{$absfile}};
my $metadataquery = qq(INSERT OR REPLACE INTO
old_metadata(recno, term, value)
VALUES(?, ?, ?));
my $sth1 = $dbh->prepare($metadataquery)
or die("prepare statement failed: $dbh->errstr()\n");
for my $term ( keys %m ) {
for my $values ( $m{$term} ) {
# exclude date-time stamps from fulltext, they are just numbers
if ($term !~ m/^dc\.date/) {
push(@record, @$values);
}
# save individual terms and values in db
for my $value (@$values) {
# individual terms and their values
$sth1->execute($recno, $term, $value)
or die("execute statement failed: $dbh->errstr()\n");
$sth1->finish;
}
}
}
# all the metadata for that one record for fulltext searching
$query = qq(INSERT OR REPLACE INTO
old_rawtext_metadata(recno, fulltext)
VALUES(?, ?));
my $sth2 = $dbh->prepare($query)
or die("prepare statement failed: $dbh->errstr()\n");
$sth2->execute($recno, join(' ', @record))
or die("execute statement failed: $dbh->errstr()\n");
$sth2->finish;
$sth->finish;
}
$dbh->commit;
return(1);
}
sub write_bodies_to_database {
my ($dbh, $_bodies) = @_;
my %bodies = %{$_bodies};
my $query = q(SELECT recno FROM old_keys WHERE file = ?);
my $sth1 = $dbh->prepare($query)
or die("prepare statement failed: $dbh->errstr()\n");
$query = q(INSERT OR REPLACE INTO
old_rawtext_body(recno, fulltext)
VALUES(?, ?));
my $sth2 = $dbh->prepare($query)
or die("prepare statement failed: $dbh->errstr()\n");
for my $absfile (sort keys %bodies) {
my ( $file ) = ( $absfile =~ m|$opt{'documentroot'}(.*)$| );
my $body;
$body = $$bodies{$absfile};
$sth1->execute($file)
or die("execute statement failed: $dbh->errstr()\n");
my $row = $sth1->fetchrow_hashref or next;
my $recno = $row->{'recno'} or next;
$sth1->finish;
$sth2->execute($recno, $body)
or die("execute statement failed: $dbh->errstr()\n");
$sth2->finish;
}
$dbh->commit;
return(1);
}
sub write_comments_to_database {
my ($dbh, $_comments) = @_;
my %comments = %{$_comments};
my $query = q(SELECT recno FROM old_keys WHERE file = ?);
my $sth1 = $dbh->prepare($query)
or die("prepare statement failed: $dbh->errstr()\n");
$query = q(INSERT OR REPLACE INTO
old_rawtext_comments(recno, fulltext)
VALUES(?, ?));
my $sth2 = $dbh->prepare($query)
or die("prepare statement failed: $dbh->errstr()\n");
for my $absfile (sort keys %{$comments}) {
my ( $file ) = ( $absfile =~ m|$opt{'documentroot'}(.*)$| );
my $comment = $$comments{$absfile};
$sth1->execute($file)
or die("execute statement failed: $dbh->errstr()\n");
my $row = $sth1->fetchrow_hashref or next;
my $recno = $row->{'recno'} or next;
$sth1->finish;
$sth2->execute($recno, $comment)
or die("execute statement failed: $dbh->errstr()\n");
$sth2->finish;
}
$dbh->commit;
return(1);
}
Generator/tr-rss-since-scraper.pl
#!/usr/bin/perl -T
# 2021-05-16
# XML RSS and Atom feed web scraper,
# feed it URLs for feeds plus a date-time stamp
# entries will be parsed and can saved in a file
# local times will be converted to UTC
use utf8;
use Getopt::Long;
use Time::ParseDate;
use Time::Piece;
use XML::Feed;
use URI;
use LWP::UserAgent;
use HTTP::Response::Encoding;
use HTML::TreeBuilder::XPath;
use HTML::Entities;
use English;
use strict;
use warnings;
our $VERBOSE = 0;
local $OUTPUT_AUTOFLUSH=1;
# work-arounds for 'wide character' error from wrong UTF8
binmode(STDIN, ":encoding(utf8)");
binmode(STDOUT, ":encoding(utf8)");
our %opt;
GetOptions('append|a' => \$opt{'a'},
'date|d=s' => \$opt{'d'},
'help|h' => \$opt{'h'},
'output|o=s' => \$opt{'o'},
'title|t' => \$opt{'t'},
'utc|u' => \$opt{'u'},
'verbose|v+' => \$opt{'u'},
'suppressli|L' => \$opt{'L'},
);
my ( $script ) = ( $0 =~ m|(?!.*/)(.*)\.[^\.]*$| );
if (defined($opt{'h'})) {
&usage($script);
}
if (defined($opt{'v'})) {
$VERBOSE = $opt{'v'};
}
my $output;
if (defined($opt{'o'})) {
# XXX needs proper sanity checking for path and filename at least
$output = $opt{'o'};
$output =~ s/[\0-\x1f]//g;
if ($output =~ /^([-\/\w\.]+)$/) {
$output = $1;
} else {
die("Bad path or file name: '$output'\n");
}
} else {
$output = '/dev/stdout';
}
my $utc = 0; # treat input as a local time and convert to UTC
if (defined($opt{'u'})) {
$utc = 1; # treat input as UTC without conversion
}
my $sdts;
if (defined($opt{'d'})) {
$sdts = parsedate($opt{'d'}, GMT=>$utc);
} else {
$sdts = parsedate('yesterday');
}
if ($VERBOSE) {
print STDERR qq(S=$sdts\n);
}
my $t = Time::Piece->strptime($sdts, '%s');
if ($VERBOSE) {
print STDERR qq(D=),$t->strftime("%a, %d %b %Y %H:%M:%S %Z"),qq(\n)
}
my $count = 0;
my $errors = 0;
while (my $url = shift) {
next if ($url =~ /^\s*#/); # skip comments
if ($VERBOSE) {
print STDERR qq(\nU=$url\n);
}
my $r = &get_feed($t,$url,$output);
if ($r) {
$count++;
} else {
$errors++;
print STDERR qq(Could not find feed at URL: "$url"\n);
}
}
if (not $count or $errors) {
&usage($script);
}
exit(0);
sub usage {
my ($script) = @_;
$script =~ s/^.*\///;
print <<EOH;
USAGE:
$script [-ahuvL] [-o file] [-d date/date-time] feed-url [feed-url...]
-a appends the file specified by -o instead of the default of
overwriting it.
-d is the date-time stamp before which feed entries published prior
to that will be ignored. Default is "yesterday" at the
current time. The format is yyyy-mm-dd or yyyy-mm-ddThh:mm
-o points to the file for collecting output, it is stdout by default.
-u treats start date as UTC, default is to use the local time zone.
-v show debugging output on stderr.
-L suppress use of <li> elements but leave the others.
-h shows this message.
Multiple feed URLs can be specified.
Queries and fragments are trimmed from the URIs.
Broken or malformed feeds will be skipped completely.
EXAMPLES:
$script -u -d 2019-08-01T00:00 http://example.com/ https://example.org/
$script -o /tmp/foo.html http://example.com/
$script -a -o /tmp/foo.html -d 2019-08-01 https://example.com/
The date for the -d option can be made using command substitution
and the date(1) utility.
$script -d \$(date -d '2 days ago' +'%Y-%m-%d') https://example.com/
KNOWN BUGS:
As a work-around for UTF-8 in Chromium and Firefox, meta elements
declaring UTF-8 explicitly are peppered through the output. The
placement cannot really be helped and the result is not valid XHTML
because these are in the wrong part of the document.
And it goes without saying that scraping sites is very brittle and
can stop working with even minor changes to the page structure.
EOH
exit(0);
}
sub get_feed {
my ($t,$url,$output) = @_;
my $uri = $url;
my $feed;
eval {
$feed = XML::Feed->parse(URI->new($uri));
};
if ($@) {
print STDERR $@,qq(\n);
print STDERR qq( Failed feed for '$uri'\n);
return(0);
} elsif (! defined($feed)) {
return(0);
}
my $feed_title;
eval {
$feed_title = $feed->title;
};
if ($@) {
print STDERR $@,qq(\n);
print STDERR qq( Failed title for '$uri'\n);
return(0);
}
my $feed_modified = encode_entities($feed->modified); # unsupported
my $feed_format = encode_entities($feed->format);
if ($VERBOSE) {
print STDERR qq(\tT=$feed_title\n);
print STDERR qq(\tF=$feed_format\n)
}
my @entries = ();
if ($feed->link =~ m|https?://cybershow.uk|) {
@entries = &read_feed_instead($t,$feed,$output);
} else {
@entries = &read_entries($t,$feed,$output);
}
if(@entries) {
my $mode;
if (defined($opt{'a'})) {
$mode = '>>';
} else {
$mode = '>';
}
open(my $out, $mode, $output)
or die("Could not open '$output' for appending: $!\n");
binmode($out, ":encoding(utf8)");
if (defined($opt{'t'})) {
print $out qq(<h3><a href="$url">$feed_title</a></h3>\n);
}
print $out join("", @entries);
close($out);
}
return(1);
}
sub read_entries {
my ($t,$feed,$output) = @_;
$t = parsedate($t);
my @entries = ();
my $count = 0;
foreach my $entry ($feed->entries) {
my $ft = $entry->{entry}{pubDate}
|| $entry->issued
|| $entry->modified;
# entry time in seconds
my $et = parsedate($ft) || 0;
next unless($et =~ /^\d+$/ && $et >= $t );
# these links are sometimes redirections from proxies
my ($base, $content) = &fetch_page($entry->link)
or die("Missing content from '",$entry->link,"'\n");
next if ($base eq -1 || $content eq -1);
next if ($base =~ /^\d+/ && $base<0);
if ($VERBOSE) {
print STDERR qq(Fetched:),substr($base,0,30),qq(\n)
}
my $uri = URI->new($base)
or die("Bad address, '$base', could not form URI\n");
$uri->query(undef);
$uri->fragment(undef);
my $site = $uri->authority;
# many sites are under feedburner
if ($site eq 'feeds.feedburner.com') {
if ($VERBOSE) {
print STDERR qq(A=Feed Burner\n);
}
if($uri->path =~ /^projectcensored/) {
$site = 'www.projectcensored.org';
} elsif($uri->path =~ /^johnpilger/) {
$site = 'johnpilger.com';
} elsif($uri->path =~ /^cubexyz.blogspot.com/) {
$site = 'cubexyz.blogspot.com';
} elsif($uri->path =~ /^LnuxTech-lb/) {
$site = 'linuxtechlab.com';
} elsif($uri->path =~ /^www.privateinternetaccess.com/) {
$site = 'www.privateinternetaccess.com';
} elsif($uri->path =~ /^original.antiwar.com/) {
$site = 'original.antiwar.com';
} elsif($uri->path =~ /^\~r\/MichaelGeistsBlog/) {
$site = 'www.michaelgeist.ca';
} elsif($uri->path =~ /^EliveLinuxWebsiteUpdates/) {
$site = 'www.elivecd.org';
} elsif($uri->path =~ /^www.tecmint.com/) {
$site = 'www.tecmint.com';
}
}
if ($VERBOSE) {
print STDERR qq(A=$site\n);
}
# remove spammy, paid-for press releases
if ($site eq 'www.commondreams.org') {
# LLL - todo
}
&scan_for_scripts($site, $content);
my $o = &choose_parser($site, $uri->canonical, $content);
if ($o) {
$count++;
push(@entries, $o);
} else {
# identify the feed which had the error
print STDERR qq(\t),$feed->title,qq(\n);
}
if ($VERBOSE) {
print STDERR qq(\t\t),$base,qq(\n);
}
}
if ($count) {
push(@entries, qq(\n<hr />\n\n));
}
return(@entries);
}
sub fetch_page {
my ($uri) = @_;
my $ua = LWP::UserAgent->new;
$ua->agent("NotRSS0day/0.1");
my $request = HTTP::Request->new(GET => $uri);
my $result = $ua->request($request);
if ($result->is_success) {
return($result->base, $result->decoded_content);
} else {
warn("Could not open '$uri' : ", $result->status_line, "\n");
return(-1,-1);
}
return(0,0);
}
sub scan_for_scripts {
my ($site, $content) = @_;
my $ent = HTML::TreeBuilder::XPath->new_from_content($content);
for my $t ($ent->findnodes('script')) {
print STDERR qq(script payload found in $site !\n);
exit(2);
}
$ent->delete;
return(1);
}
sub choose_parser {
my ($site, $url, $content) = @_;
my ($xpath_title, $xpath_description) = (0,0);
my ($title, $description) = (0,0);
if ($VERBOSE) {
print STDERR qq(S=$site\n);
}
my $ent = HTML::TreeBuilder::XPath->new_from_content($content);
if ($site eq '9to5linux.com') {
$xpath_title = '//h1[@class="entry-title"]';
$title = parse_title($ent, $xpath_title);
$xpath_description = '//div[@class="entry-content"]/p[position()<3]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'www.aclu.org') {
$xpath_title = '//html/head/meta[@property="og:title"]';
$title = parse_title($ent, $xpath_title);
$xpath_description = '//div/div[@class="panel-pane pane-aclu-components-description description"]/p[1]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'anniemachon.ch') {
$xpath_title = '//html/head/meta[@property="og:title"]';
$title = parse_title($ent, $xpath_title);
$xpath_description = '//div[@class="entry-content"]/p[1]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'original.antiwar.com') {
$xpath_title = '//html/head/meta[@property="og:title"]';
$title = parse_title($ent, $xpath_title);
$title =~ s/\s+\W\s+Antiwar.com Original//;
$xpath_description = '//div[@class="entry-content"]/p[1]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'ar.al') {
$xpath_title = '//html/head/meta[@property="og:title"]';
$title = parse_title($ent, $xpath_title);
$xpath_description = '//body/p[1]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'archlinux.org') {
$xpath_title = '//h2[@itemprop="headline"]';
$title = parse_title($ent, $xpath_title);
$xpath_description = '//div[@class="article-content"]/p[1]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'blog.arduino.cc') {
$xpath_title = '//div[@class="post"]/h3[1]';
$title = parse_title($ent, $xpath_title);
$xpath_description = '//div[@class="entry"]/p[position()<3]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'blog.benjojo.co.uk') {
$xpath_title = '//head/title';
$title = parse_title($ent, $xpath_title);
$xpath_description = '//h1/following-sibling::p[position()<3]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'www.bunniestudios.com') {
$xpath_title = '//h2[1]';
$title = parse_title($ent, $xpath_title);
$xpath_description = '//h2/following-sibling::div[1]/p[position()<3]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'kb.cert.org') {
$xpath_title = '//div/div/div/div[@class="large-12 columns"]/h2';
$title = parse_title($ent, $xpath_title);
$xpath_description = '//head/meta[@name="Description"]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'www.commondreams.org') {
return(0) if ($url =~/\/newswire\//);
$xpath_title = '//html/head/meta[@property="og:title"]';
$title = parse_title($ent, $xpath_title);
# $xpath_description = '//head/meta[@name="description"]';
$xpath_description = '//div[3]/div[@class="body-description"]/p[1]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'www.counterpunch.org') {
$xpath_title = '//html/head/meta[@property="og:title"]';
$title = parse_title($ent, $xpath_title);
$title =~ s/\s+-\s+CounterPunch.org//;
# $xpath_description = '//div[@class="story-header-area"]/p[1]';
$xpath_description = '//div[@class="story-header-area"]/p[position()<3 and not(contains(text(),"Subscribers content"))]';
$description = parse_description($ent, $xpath_description);
$description = 0 if($description =~ /We don't shake our/);
unless($description) {
$xpath_description = '//div[@class="post_content"]/p[position()<3 and not(contains(text(),"Subscribers content"))]';
$description = parse_description($ent, $xpath_description);
}
} elsif ($site eq 'couragefound.org') {
$xpath_title = '//html/head/meta[@name="twitter:title"]';
$title = parse_title($ent, $xpath_title);
$xpath_description = '//div[@class="entry-content"]/p[2]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'cpj.org') {
$xpath_title = '//html/head/meta[@property="og:title"]';
$title = parse_title($ent, $xpath_title);
# .col-sm-7 > article:nth-child(1) > p:nth-child(3)
$xpath_description = '//div[@class="col-sm-7"]/p[1]';
$description = parse_description($ent, $xpath_description);
$description =~ s/>[^>]*—/>/;
} elsif ($site eq 'climatenewsnetwork.net') {
$xpath_title = '//html/head/meta[@property="og:title"]';
$title = parse_title($ent, $xpath_title);
$title =~ s/\s+\W\s+Climate News Network//;
$xpath_description = '//div[@class="entry-content-post"]/p[1]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'www.craigmurray.org.uk') {
$xpath_title = '//html/head/meta[@name="twitter:title"]';
$title = parse_title($ent, $xpath_title);
$xpath_description = '//h1/following-sibling::p[1]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'creativecommons.org') {
$xpath_title = '//html/head/meta[@property="og:title"]';
$title = parse_title($ent, $xpath_title);
$title =~ s/\s+\W\s+Creative Commons//;
$xpath_description = '//div[@class="entry-content"]/p[1]';
$description = parse_description($ent, $xpath_description);
unless($description eq '<blockquote><p></p></blockquote>') {
$xpath_description = '//div[@class="entry-content"]/p[2]';
$description = parse_description($ent, $xpath_description);
}
} elsif ($site eq 'cubexyz.blogspot.com') {
$xpath_title = '//html/head/title';
$title = parse_title($ent, $xpath_title);
$xpath_description = '//div[@id="mainClm"]/div[@class="blogPost"]';
$description = parse_description($ent, $xpath_description);
$description =~ s/\s+//;
# $description =~ s/\s\s+.*<\/blockquote>/<\/blockquote>/m;
} elsif ($site eq 'danielmiessler.com') {
$xpath_title = '//h1[@class="entry-title"]';
$title = parse_title($ent, $xpath_title);
# remove podcasts
return(0) if ($title =~ m/Unsupervised Learning: No\./);
$xpath_description = '//div[@class="entry-content"]/p[position()>=last()-1]';
$description = parse_description($ent, $xpath_description);
# remove adverts for social control media
# my $de = HTML::TreeBuilder::XPath->new_from_content($description);
# for my $p ($de->findnodes('//p')) {
# if($p->as_text =~ m/^Discuss on Tw/) {
# $p->delete;
# }
# }
# $description = $de->as_XML_compact;
# $de->delete();
$description =~ s/^.*(<blockquote>)/$1/;
$description =~ s/(<\/blockquote>).*$/$1/;
} elsif ($site eq 'dataswamp.org') {
$xpath_title = '//h1[1]';
$title = parse_title($ent, $xpath_title);
$xpath_description = '//h1/following-sibling::p[position()>1 and position()<4]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'www.democracynow.org') {
$xpath_title = '//h1[1]';
$title = parse_title($ent, $xpath_title);
return(0) if ($title =~ m/recent shows/i);
return(0) if ($title =~ m/^headlines/i);
$xpath_description = '(//div[@class="headline_body"]/div[@class="headline_summary"]/p[1])[1]';
$description = parse_description($ent, $xpath_description);
unless($description) {
$xpath_description = '(//div[@class="text"]/p[1])[1]';
$description = parse_description($ent, $xpath_description);
}
} elsif ($site eq 'www.digitalmusicnews.com') {
$xpath_title = '//html/head/title';
$title = parse_title($xpath_title, $content);
$title = failed_utf($title);
$xpath_description = '//div[@id="main"]//h2';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'www.desmog.com') {
$xpath_title = '//div[@class="elementor-widget-container"]/h1';
$title = parse_title($ent, $xpath_title);
# $xpath_description = '//div[@class="elementor-widget-container"]/div/p[position()<3]';
$xpath_description = '(//div[@class="elementor-widget-container"]/div/p)[position()<3]';
$description = parse_description($ent, $xpath_description);
# xxx work-around to eliminate site signature :(
$description =~ s/<p>Website by.*//ms;
} elsif ($site eq 'www.desmogblog.com') {
$xpath_title = '//html/head/meta[@property="og:title"]';
$title = parse_title($ent, $xpath_title);
$xpath_description = '//div[@class="field-items"]/div[1]/p[2]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'thedissenter.org') {
$xpath_title = '//h1[1]';
$title = parse_title($ent, $xpath_title);
$xpath_description = '//div[@class="content"]/p[position()<3]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'dontextraditeassange.com') {
$xpath_title = '//div[@class="entry-categories"]/following-sibling::h1[@class="entry-title"]';
$title = parse_title($ent, $xpath_title);
$xpath_description = '//div[@class="entry-content"]/p[position()>1 and position()<3]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'www.dw.com') {
$xpath_title = '//div[1]/h1[1]';
$title = parse_title($ent, $xpath_title);
$xpath_description = '//div[1]/h1[1]/following-sibling::p[1]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'www.elivecd.org') {
$xpath_title = '//h1[@class="post-title"]';
$title = parse_title($ent, $xpath_title);
$xpath_description = '//div[@class="post-content"]/h5[1]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'www.theenergymix.com') {
# lll
$xpath_title = '//h1[@class="jeg_post_title"]';
$title = parse_title($ent, $xpath_title);
$xpath_description = '//div[@class="content-inner"]/p[position()<3]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'www.eff.org') {
$xpath_title = '//html/head/meta[@property="og:title"]';
$title = parse_title($ent, $xpath_title);
# work-around for something broken with p[1]
$xpath_description = '//div[@class="field__items"]/div[1]/p[position()>1 and position()<=4]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'www.exposedbycmd.org') {
$xpath_title = '//h1[@class="entry-title"]';
$title = parse_title($ent, $xpath_title);
$xpath_description = '//div[@class="entry-content"]/p[position()<=2]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'fair.org') {
$xpath_title = '//html/head/meta[@property="og:title"]';
$title = parse_title($ent, $xpath_title);
# .entry-content > p:nth-child(4)
$xpath_description = '//div[@class="entry-content"]/p[2]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'femtejuli.se') {
$xpath_title = '//html/head/meta[@property="og:title"]';
$title = parse_title($ent, $xpath_title);
$xpath_description = '//html/head/meta[@property="og:description"]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'ferd.ca') {
$xpath_title = '//html/head/meta[@property="og:title"]';
$title = parse_title($ent, $xpath_title);
$xpath_description = '//h2/following-sibling::p[1]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'fortran-lang.org') {
$xpath_title = '//div[@class="newsletter col-wide"]/h1[1]';
$title = parse_title($ent, $xpath_title);
$xpath_description = '//div[@class="newsletter col-wide"]/p[1]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'fossforce.com') {
$xpath_title = '//div//h1[@class="post-title"]';
$title = parse_title($ent, $xpath_title);
$xpath_description = '//div[@class="post-content"]/p[position()<3]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'www.fossmint.com') {
$xpath_title = '//h1[@class="entry-title"]';
$title = parse_title($ent, $xpath_title);
$xpath_description = '//div[@class="entry-content"]/p[position()<3]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'www.france24.com') {
$xpath_title = '//html/head/title';
$title = parse_title($ent, $xpath_title);
$xpath_description = '//div[@class="t-content t-content--article"]/p[1]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'www.gamingonlinux.com') {
$xpath_title = '//div/h1[@class="title p-name"]';
$title = parse_title($ent, $xpath_title);
$xpath_description = '//div[@class="content group e-content"]/p[1]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'godotengine.org') {
# lll
$xpath_title = '//div[@class="info"]/h1[1]';
$title = parse_title($ent, $xpath_title);
$xpath_description = '//div[@class="info"]/following-sibling::p[position()<3]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'thegrayzone.com') {
$xpath_title = '//h1[@class="entry-title" and 1]';
unless($title) {
$xpath_title = '//h1[1]';
$title = parse_title($ent, $xpath_title);
}
$xpath_description = '//div[@class="entry-content"]/h3[1]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'www.greenparty.org.uk') {
# LLL fix this above with $et, does not currently get this far
$xpath_title = '//div[@class="threequarters"]/h1[1]';
$title = parse_title($ent, $xpath_title);
$xpath_description = '//div[@class="threequarters"]/h1[1]/following-sibling::p[3]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'hackaday.com') {
$xpath_title = '//h1[@class="entry-title"]';
$title = parse_title($ent, $xpath_title);
# $xpath_description = '//html/head/meta[@property="og:description"]';
$xpath_description = '//div[@class="entry-content"]/p[1]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'www.hrw.org') {
$xpath_title = '//html/head/meta[@property="og:title"]';
$title = parse_title($ent, $xpath_title);
# $xpath_description = '//html/head/meta[@property="og:description"]';
$xpath_description = '//div[@class="article-body article-body--contained"]/p[position()<3]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'infojustice.org') {
$xpath_title = '//h1[@class="entry-title"]';
$title = parse_title($ent, $xpath_title);
$xpath_description = '//div[@class="post-content entry-content"]/p[position()>1 and position()<4]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'insighthungary.444.hu' or $site eq '444.hu') {
$xpath_title = '//div[@id="headline"]/h1';
$title = parse_title($ent, $xpath_title);
$xpath_description = '//p[position()<3]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'www.itwire.com') {
$xpath_title = '//h2[@class="itemTitle"]';
$title = parse_title($ent, $xpath_title);
$title =~ s/\s+Featured.*//; # should have been in XPath instead
$xpath_description = '//div[@class="itemIntroText"]/p';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'jacobinmag.com') {
$xpath_title = '//body/h1[@class="po-hr-cn__title"]';
$title = parse_title($ent, $xpath_title);
$xpath_description = '//h1/following-sibling::p[@class="po-hr-cn__dek"]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'johnpilger.com') {
$xpath_title = '//html/head/title';
$title = parse_title($ent, $xpath_title);
$title = &title_case($title);
$xpath_description = '//div[@class="text book last full" and position()=1]/p[1]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'krebsonsecurity.com') {
$xpath_title = '//div/h1[@class="entry-title"]';
$title = parse_title($ent, $xpath_title);
$xpath_description = '//div[@class="entry-content"]/p[1]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'kubernetes.io') {
$xpath_title = '//div[@class="content"]/h1';
$title = parse_title($ent, $xpath_title);
$xpath_description = '//div[@class="td-content"]/p[position()>1 and position() < 5 and not(preceding-sibling::h2)]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'www.laquadrature.net') {
$xpath_title = '//html/head/meta[@property="og:title"]';
$title = parse_title($ent, $xpath_title);
$xpath_description = '//div[@class="entry-content entry-content-single"]/p[1]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'www.lightbluetouchpaper.org') {
$xpath_title = '//h1[@class="entry-title"]';
$title = parse_title($ent, $xpath_title);
$xpath_description = '//div[@class="entry-content"]/p[position()<=2]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'www.linuxandubuntu.com') {
$xpath_title = '//div/h1[@class="alignwide wp-block-post-title"]';
$title = parse_title($ent, $xpath_title);
$xpath_description = '//div[contains(@class, "entry-content")]/p[position() < 5 and not(preceding-sibling::h2)]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'www.linuxbuzz.com') {
$xpath_title = '//div[@class="inside-article"]/h1[@class="entry-title"]';
$title = parse_title($ent, $xpath_title);
$xpath_description = '//div[@class="entry-content"]/p[1]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'www.linuxcloudvps.com') {
$xpath_title = '//h2[1]';
$title = parse_title($ent, $xpath_title);
$xpath_description = '//p[position()>1 and position()<3]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'linuxhandbook.com') {
$xpath_title = '//div/h1[@class="hero__title text-center"]';
$title = parse_title($ent, $xpath_title);
$xpath_description = '//div[@class="content js-toc-content"]/p[1]';
$description = parse_description($ent, $xpath_description);
# skip newsletters and such
if(!$description) {
return(0);
}
} elsif ($site eq 'www.linuxtechi.com') {
$xpath_title = '//div/h1[@class="entry-title"]';
$title = parse_title($ent, $xpath_title);
$xpath_description = '//div[@class="nv-content-wrap entry-content"]/p[position()<3]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'linuxgizmos.com') {
$xpath_title = '//div[@class="post"]/h2';
$title = parse_title($ent, $xpath_title);
$xpath_description = '//div[@class="entrytext"]/p[position()<3]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'linuxtechlab.com') {
$xpath_title = '//h1[1]';
$title = parse_title($ent, $xpath_title);
$xpath_description = '//div[@class="text"]/p[position()<3]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'lunduke.com') {
$xpath_title = '//html/head/meta[@property="og:title"]';
$title = parse_title($ent, $xpath_title);
$xpath_description = '//html/head/meta[@property="og:description"]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'markcurtis.info') {
$xpath_title = '//html/head/meta[@property="og:title"]';
$title = parse_title($ent, $xpath_title);
$title =~ s/\s+\W\s+Meduza//;
$xpath_description = '//div[@class="entry-content"]/p[position()>=3 and position()<=4]';
$description = parse_description($ent, $xpath_description);
unless($description) {
# some do not have the extra byline
# but it is hard to parse which do:
$xpath_description = '//div[@class="entry-content"]/p[position()>=2 and position()<=3]';
$description = parse_description($ent, $xpath_description);
}
} elsif ($site eq 'meduza.io') {
$xpath_title = '//html/head/meta[@property="og:title"]';
$title = parse_title($ent, $xpath_title);
$title =~ s/\s+\W\s+Meduza//;
$xpath_description = '//div[@class="GeneralMaterial-article"]/p[1]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'www.michaelgeist.ca') {
$xpath_title = '//h1[@class="title"]';
$title = parse_title($ent, $xpath_title);
return(0) if($title=~/^The LawBytes Podcast/);
$xpath_description = '//div[@class="entry"]/p[last()]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'michaelwest.com.au') {
$xpath_title = '//html/head/title';
$title = parse_title($ent, $xpath_title);
$title =~ s/\s+\W\s+Michael West.*//;
$xpath_description = '//div[@class="et_pb_title_container"]/p[@class="et_pb_title_meta_container"]';
$description = parse_description($ent, $xpath_description);
if ($description =~ m/\bAAP\b/) {
return(0);
}
$xpath_description = '//div[@id="old-post"]/p[position()<3]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'www.michaelwest.com.au') {
$xpath_title = '//html/head/title';
$title = parse_title($ent, $xpath_title);
$title =~ s/\s+\W\s+Michael West.*//;
$xpath_description = '//head/meta[@property="og:description"]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'www.mintpressnews.com') {
$xpath_title = '//head/meta[@property="og:title"]';
$title = parse_title($ent, $xpath_title);
$xpath_description = '//div[@class="entry-content"]/p[1]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'blog.mozilla.org') {
$xpath_title = '//div[1]/h1[1]';
$title = parse_title($ent, $xpath_title);
$xpath_description = '//div[@class="ft-c-single-post__body"]/p[1]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'www.thenation.com') {
$xpath_title = '//div[@class="article-header-content"]/h1[@class="title"]';
$title = parse_title($ent, $xpath_title);
$xpath_description = '//div[@class="article-body-inner"]/p[position()<3 and @class!="caption"]';
$description = parse_description($ent, $xpath_description);
$description =~ s/[\d\s]*Ad Policy.*$//i;
} elsif ($site eq 'newmatilda.com') {
$xpath_title = '//html/head/title';
$title = parse_title($ent, $xpath_title);
$title =~ s/\s+\W\s+New Matilda.*//;
$xpath_description = '//div/div[@class="post-content text-font description"]/p[2]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'oceanservice.noaa.gov') {
$xpath_title = '//html/head/title';
$title = parse_title($ent, $xpath_title);
$title =~ s/\s+\W\s+Michael West.*//;
$xpath_description = '//head/meta[@property="og:description"]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'off-guardian.org') {
$xpath_title = '//html/head/meta[@property="og:title"]';
$title = parse_title($ent, $xpath_title);
$xpath_description = '//h6/following-sibling::p[@class="dropcap"]';
$description = parse_description($ent, $xpath_description);
unless($description) {
$xpath_description = '//div[@class="transcript"]/p[1]';
$description = parse_description($ent, $xpath_description);
}
} elsif ($site eq 'papersplease.org') {
$xpath_title = '//h1[@class="post-title entry-title"]';
$title = parse_title($ent, $xpath_title);
$xpath_description = '//div[@class="entry-content"]/p[position()<4]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'news.opensuse.org') {
$xpath_title = '//h1[@class="decorated-title"]';
$title = parse_title($ent, $xpath_title);
$xpath_description = '//div[@class="col-md-7 col-12 mx-auto text-justify"]/p[position() <3]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'opensource.com') {
$xpath_title = '//h1[@class="published page-title"]';
$title = parse_title($ent, $xpath_title);
$xpath_description = '//div[@id="article_content"]//div[@class="clearfix text-formatted field field--name-body field--type-text-with-summary field--label-hidden field__item"]/p[not(preceding-sibling::h2) and position() < 5]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'opensourcesecurity.io') {
$xpath_title = '//h1[@class="entry-title"]';
$title = parse_title($ent, $xpath_title);
$xpath_description = '//div[@class="entry-content"]/p[1]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'ostechnix.com') {
$xpath_title = '//div/h1[@class="post-title single-post-title entry-title"]';
$title = parse_title($ent, $xpath_title);
$xpath_description = '//div[@class="inner-post-entry entry-content"]/div/p[1]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'www.pclinuxos.com') {
$xpath_title = '//div[@class="title"]/h2[1]';
$title = parse_title($ent, $xpath_title);
$title =~ s/^\s+//;
$xpath_description = '//div[@class="entry"]/p[1]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'perens.com') {
# header.entry-header h1.entry-title
$xpath_title = '//h1[@class="entry-title"]';
$title = parse_title($ent, $xpath_title);
$xpath_description = '//div[@class="entry-content"]/p[1]';
$description = parse_description($ent, $xpath_description);
unless($description) {
$xpath_description = '//div[@class="entry-content"]/descendant::p[1]';
$description = parse_description($ent, $xpath_description);
}
} elsif ($site eq 'perlweeklychallenge.org') {
$xpath_title = '//html/head/title';
$title = parse_title($ent, $xpath_title);
$xpath_description = '//div[@class="post-content"]/p[1]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'www.projectcensored.org') {
$xpath_title = '//html/head/meta[@property="og:title"]';
$title = parse_title($ent, $xpath_title);
$xpath_description = '//div[@class="inner-post-entry entry-content"]/p[2]';
$description = parse_description($ent, $xpath_description);
if(!$description || $description =~ /Listen to all of our previous/) {
$xpath_description = '//div[@id="penci-post-entry-inner"]/div/div/div[1]';
$description = parse_description($ent, $xpath_description);
}
if(!$description || $description =~ /Listen to all of our previous/) {
$xpath_description = '//div[@id="penci-post-entry-inner"]/p[1]';
$description = parse_description($ent, $xpath_description);
}
if(!$description || $description =~ /Listen to all of our previous/) {
$xpath_description = '//div[@id="penci-post-entry-inner"]/div/div[1]';
$description = parse_description($ent, $xpath_description);
}
if(!$description || $description =~ /Listen to all of our previous/) {
$xpath_description = '//div[@id="penci-post-entry-inner"]/div[1]';
$description = parse_description($ent, $xpath_description);
}
} elsif ($site eq 'pluralistic.net') {
1;
# placeholder
} elsif ($site eq 'www.privateinternetaccess.com') {
$xpath_title = '//html/head/meta[@property="og:title"]';
$title = parse_title($ent, $xpath_title);
$xpath_description = '//div[@class="detail-ct"]/p[2]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'projects.propublica.org') {
$xpath_title = '//html/head/title';
$title = parse_title($ent, $xpath_title);
$title =~ s/\s*\|.*$//;
$xpath_description = '//html/head/meta[@property="og:description"]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'features.propublica.org') {
$xpath_title = '//html/head/title';
$title = parse_title($ent, $xpath_title);
$xpath_description = '//html/head/meta[@property="og:description"]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'www.propublica.org') {
# $xpath_title = '//html/head/meta[@name="dcterms.Title"]';
$xpath_title = '//html/head/meta[@property="headline"]';
$title = parse_title($ent, $xpath_title);
unless($title) {
$xpath_title = '//h2[@class="hed"]';
$title = parse_title($ent, $xpath_title);
}
$xpath_description = '//div[@class="article-body"]/p[position()<=2]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'www.openrightsgroup.org') {
$xpath_title = '//html/head/meta[@property="og:title"]';
$title = parse_title($ent, $xpath_title);
$xpath_description = '//div[@class="sixteen columns"]/*/p[1]';
$description = parse_description($ent, $xpath_description);
unless ($description) {
$xpath_description = '//div[@class="sixteen columns"]/p[1]';
$description = parse_description($ent, $xpath_description);
}
} elsif ($site eq 'puri.sm') {
$xpath_title = '//div[@class="container"]/h1[1]';
$title = parse_title($ent, $xpath_title);
$xpath_description = '//div[@class="blog-entry e-content"]/p[not(preceding-sibling::h1) and position() < 4]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'www.qt.io') {
$xpath_title = '//div[@class="h-wysiwyg-html/h1"]';
$title = parse_title($ent, $xpath_title);
$xpath_description = '//span[@id="hs_cos_wrapper_post_body"]/p[position()<3]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'rakudoweekly.blog') {
$xpath_title = '//html/head/meta[@property="og:title"]';
$title = parse_title($ent, $xpath_title);
$xpath_description = '(//div[@class="entry-content"]/p[1]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'www.raspberrypi.org') {
$xpath_title = '//h1[2]';
$title = parse_title($ent, $xpath_title);
unless ($title) {
$xpath_title = '//h1[1]';
$title = parse_title($ent, $xpath_title);
}
$xpath_description = '//html/head/meta[@property="og:description"]';
$description = parse_description($ent, $xpath_description);
unless($description) {
$xpath_description = '//div[contains(@class,"c-post-content__wysiwyg")]/p[1]';
$description = parse_description($ent, $xpath_description);
}
} elsif ($site eq 'www.redhat.com') {
$xpath_title = '//div[@class="rh-article-teaser--component"]/h1';
$title = parse_title($ent, $xpath_title);
$xpath_description = '//div[starts-with(@class,"rh-generic")]//p[not(preceding-sibling::h3) and position() < 3]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'respectfulinsolence.com'
|| $site eq 'www.respectfulinsolence.com') {
$xpath_title = '//h1[@class="entry-title"]';
$title = parse_title($ent, $xpath_title);
$xpath_description = '(//div[@class="entry-content"]/p[1]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'therevelator.org') {
$xpath_title = '//html/head/meta[@property="og:title"]';
$title = parse_title($ent, $xpath_title);
$title =~ s/\s+.bull\;.*//;
$title =~ s/\s+•.*//;
$xpath_description = '(//div[@id="entry-content"]/p[1]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'www.rferl.org') {
$xpath_title = '//html/head/meta[@property="og:title"]';
$title = parse_title($ent, $xpath_title);
$xpath_description = '(//div[@id="article-content"]/div[1]/p[1]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'robertreich.org') {
$xpath_title = '//div[@class="caption"]/h2/b';
$title = parse_title($ent, $xpath_title);
if (!$title) {
$xpath_title = '//li[@class="post"]/a/h2';
$title = parse_title($ent, $xpath_title);
$xpath_description = '(//div[@class="caption"])/p[2]';
$description = parse_description($ent, $xpath_description);
} else {
$xpath_description = '(//div[@class="caption"])[last()]/p[last()]';
$description = parse_description($ent, $xpath_description);
}
} elsif ($site eq 'robert.ocallahan.org') {
$xpath_title = '//html/head/meta[@property="og:title"]';
$title = parse_title($ent, $xpath_title);
$xpath_description = '(//div[@class="post-body entry-content"]/p[1]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'www.rosehosting.com') {
$xpath_title = '//div/h1[@class="entry-title"]';
$title = parse_title($ent, $xpath_title);
$xpath_description = '(//div[@class="entry-content"]/p[not(preceding-sibling::h3) and position() < 3]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'shadowproof.com') {
$xpath_title = '//html/head/meta[@property="og:title"]';
$title = parse_title($ent, $xpath_title);
# div.vw-post-content.clearfix p
$xpath_description = '//div[@class="vw-post-content clearfix"]/p[position()<=2]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'scheerpost.com') {
$xpath_title = '//h1[contains(@class,"entry-title")]';
$title = parse_title($ent, $xpath_title);
$title =~ s/\s+-\s+CounterPunch.org//;
$xpath_description = '//head/meta[@property="og:description"]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'www.spiegel.de') {
$xpath_title = '//h2[@class="article-title lp-article-title"]';
$title = parse_title($ent, $xpath_title);
$xpath_description = '//div/h2/following-sibling::p[@class="article-intro"]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'digit.site36.net') {
$xpath_title = '//h3[@class="wp-block-post-title"]';
$title = parse_title($ent, $xpath_title);
$xpath_description = '//div[@class="has-global-padding is-layout-constrained entry-content cat-links entry-meta tag-links entry-content edit-link page-links wp-block-post-content"]/p[1]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'blog.steve.fi') {
$xpath_title = '//html/head/title';
$title = parse_title($ent, $xpath_title);
$xpath_description = '//div[@class="entry-content"]/p[position()>=last()-1]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'techcrunch.com') {
$xpath_title = '//html/head/meta[@name="sailthru.title"]';
$title = parse_title($ent, $xpath_title);
$title = failed_utf($title);
$xpath_description = '//html/head/meta[@name="sailthru.description"]';
$description = parse_description($ent, $xpath_description);
$description = failed_utf($description);
$url =~ s/\?[^\?]*$//;
} elsif ($site eq 'www.techdirt.com') {
$xpath_title = '//h1[@class="posttitle"]';
$title = parse_title($ent, $xpath_title);
# remove Daily Deals
return (0) if ($title =~ m/^Daily Deal/);
# remove Funniest
return (0) if ($title =~ m/^Funniest/i);
# skip recaps
return(0) if ($title =~ m/^This Week In Techdirt History/i);
$xpath_description = '//div[@class="byline"]/following-sibling::p[1]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'www.tecmint.com') {
$xpath_title = '//h1[@class="entry-title"]';
$title = parse_title($ent, $xpath_title);
$xpath_description = '//div[@class="entry-content"]/p[position()<3]';
$description = parse_description($ent, $xpath_description);
# lll
} elsif ($site eq 'www.technologyreview.com') {
$xpath_title = '//html/head/meta[@property="og:title"]';
$title = parse_title($ent, $xpath_title);
$xpath_description = '//div[1]/p[1]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'www.tedunangst.com') {
# http://www.tedunangst.com/flak/rss
$xpath_title = '//html/head/title';
$title = parse_title($ent, $xpath_title);
$xpath_description = '//div[@class="byline"]/following-sibling::p[1]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'threatpost.com') {
$xpath_title = '//html/head/meta[@property="og:title"]';
$title = parse_title($ent, $xpath_title);
$xpath_description = '//div[@class="c-article__intro"]/p[1]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'telex.hu') {
$xpath_title = '//div[1]/div[1]/h1';
$title = parse_title($ent, $xpath_title);
# $xpath_description = '//div[@class="top-section"]/following-sibling::p[1]';
$xpath_description = '//div[@class="article-html-content"]/div/p[1]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'blog.torproject.org') {
$xpath_title = '//h1[@class="title"]';
$title = parse_title($ent, $xpath_title);
$xpath_description = '//div[@class="body"]/p[position()<3]';
$description = parse_description($ent, $xpath_description);
unless($description) {
$xpath_description = '(//p)[2]';
$description = parse_description($ent, $xpath_description);
}
} elsif ($site eq 'torrentfreak.com') {
$xpath_title = '//html/head/title';
$title = parse_title($ent, $xpath_title);
$title =~ s/\s[\-\*]\sTorrentFreak$//;
return (0) if ($title =~ /Most Torrented Movie of The Week/i);
# '//div[@class="entry-summary"]/p[@class="entry-lead"]'
$xpath_description = '//p[@class="article__excerpt"]';
$description = parse_description($ent, $xpath_description);
$url =~ s/\?.*$//;
} elsif ($site eq 'blog.trailofbits.com') {
$xpath_title = '//html/head/meta[@property="og:title"]';
$title = parse_title($ent, $xpath_title);
$xpath_description = '//div[@class="entry-content"]/p[1]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'www.truthdig.com') {
$xpath_title = '//html/head/meta[@property="og:title"]';
$title = parse_title($ent, $xpath_title);
unless($title) {
$xpath_title = '//html/head/meta[@name="twitter:title"]';
$title = parse_title($ent, $xpath_title);
}
$xpath_description = '//div[@class="article-item__content am2-content"]/p[1]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'truthout.org') {
$xpath_title = '//h1[1]';
$title = parse_title($ent, $xpath_title);
$xpath_description = '//div[@id="article-content"]/p[1]';
$description = parse_description($ent, $xpath_description);
unless($description) {
$xpath_description = '//p[@data-pp-id="1.0"]';
$description = parse_description($ent, $xpath_description);
}
# LLL - truthout's XHTML has multiple fatal validation errors
# cannot be processed, yet
} elsif ($site eq 'ubuntu.com') {
$xpath_title = '//html/head/title';
$title = parse_title($ent, $xpath_title);
$title =~ s/\s+\|.*$//;
$xpath_description = '//div[@class="p-post__content"]//p[not(preceding-sibling::h2) and position() < 3]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'www.ubuntubuzz.com') {
$xpath_title = '//div[@class="title"]/h1';
$title = parse_title($ent, $xpath_title);
$xpath_description = '//div[@class="entry"]/p[2]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'www.unixmen.com') {
$xpath_title = '//div/h1[@class="entry-title"]';
$title = parse_title($ent, $xpath_title);
$xpath_description = '//div[@class="td-post-content"]//p[not(preceding-sibling::h2) and position() < 4]';
$description = parse_description($ent, $xpath_description);
unless($description) {
$xpath_description = '//div[@class="td-post-content"]/p[position()>2 and position()<5]';
$description = parse_description($xpath_description, $content);
}
} elsif ($site eq 'vitux.com') {
$xpath_title = '//div[@class="post-title-wrapper"]/h1';
$title = parse_title($ent, $xpath_title);
$xpath_description = '//div[@class="entry-content clearfix"]/p[not(preceding-sibling::h2) and position() < 3]';
$description = parse_description($ent, $xpath_description);
unless($description) {
$xpath_description = '//div[@class="entry-content clearfix"]/p[1]';
$description = parse_description($xpath_description, $content);
}
unless($description) {
$xpath_description = '//div[@class="entry-content clearfix"]/p[2]';
$description = parse_description($xpath_description, $content);
}
} elsif ($site eq 'yottadb.com') {
$xpath_title = '//html/head/meta[@property="og:title"]';
$title = parse_title($ent, $xpath_title);
$xpath_description = '//div/div[@class="col-sm-20" and position()=3]/p[1]';
$description = parse_description($ent, $xpath_description);
} elsif ($site eq 'www.zenwalk.org') {
$xpath_title = '//h3[@class="post-title entry-title"]';
$title = parse_title($ent, $xpath_title);
$xpath_description = '//div[@class="post-body entry-content"]';
$description = parse_description($ent, $xpath_description);
} else {
# the site does not yet have XPaths, return with an error
print STDERR qq(Site "$site" is not yet configured,);
print STDERR qq(\tSee "$url"\n);
$ent->delete;
return(0);
}
# LLL - should print warning if no title or description is found
if ( $description !~ /<p>/ ) {
$description = "<blockquote><p>$description</p></blockquote>";
}
$ent->delete;
return( &print_item($title, $url, $description) );
}
sub parse_title {
my ($ent, $xpath_title) = @_;
my $title = 0;
for my $t ($ent->findnodes($xpath_title)) {
if($t->tag eq 'meta') {
$title = $t->attr('content') || 0;
} else {
$title = $t->as_text || 0;
}
}
$title =~ s/\s+$//m;
$title =~ s/^\s+//mg;
$title = encode_entities($title);
return($title);
}
sub parse_description {
my ($ent, $xpath_description) = @_;
my $description = '';
for my $d ($ent->findnodes($xpath_description)) {
if($d->tag eq 'meta') {
my $desc = encode_entities($d->attr('content'));
$description .= '<p>'.$desc."</p>\n" || 0;
} elsif($d->tag eq 'p') {
if($d->as_trimmed_text) {
my $desc = encode_entities($d->as_trimmed_text);
$description .= '<p>'.$desc."</p>\n";
}
} else {
$description = encode_entities($d->as_trimmed_text);
$description .= $description.qq(\n);
}
}
if ($description) {
$description =~ s/>\s+/>/gm;
$description = qq(<blockquote>$description</blockquote>\n);
}
# delete hidden soft-hyphen and zero-width space trackers
$description =~ s/[\x{00AD}\x{200B}]//g;
return($description);
}
sub failed_utf {
my ($text) = @_;
# crude work-arounds for failed utf-8 / unicode
# $text =~ s/â/'/g;
$text =~ s/\x{2060}//g;
return($text);
}
sub print_item {
my ($title, $url, $description) = @_;
my $output;
if(!defined($opt{'L'})) {
$output .= qq(<li>);
}
$output .= qq(<h5><a href="$url">$title</a></h5>\n);
if($description) {
$output .= qq($description);
} else {
$output .= qq(<blockquote>\n</blockquote>\n);
}
if(!defined($opt{'L'})) {
$output .= qq(</li>\n\n);
}
return($output);
}
sub title_case {
my ($title) = @_;
# based on Chapter 1.14.2, Perl Cookbook, 2nd ed.
our %nocap;
unless(keys %nocap) {
foreach my $w (qw(a an the and but or as at but by for
from in into of off on onto per to with)) {
$nocap{$w}++;
}
}
# put into lowercase if on stop list, else titlecase
$title =~ s/(\pL[\pL']*)/$nocap{$1} ? lc($1) : ucfirst(lc($1))/ge;
# last word guaranteed to cap
$title =~ s/^(\pL[\pL']*) /\u\L$1/x;
# first word guaranteed to cap
$title =~ s/ (\pL[\pL']*)$/\u\L$1/x;
# treat parenthesized portion as a complete title
$title =~ s/\( (\pL[\pL']*) /(\u\L$1/x;
$title =~ s/(\pL[\pL']*) \) /\u\L$1)/x;
# capitalize first word following colon or semi-colon
$title =~ s/ ( [:;] \s+ ) (\pL[\pL']* ) /$1\u\L$2/x;
return ($title);
}
sub read_feed_instead {
my ($t,$feed,$output) = @_;
# use feed metadata instead of parsing fetched articles
$t = parsedate($t);
my @entries = ();
my $count = 0;
foreach my $entry ($feed->entries) {
my $ft = $entry->{entry}{pubDate}
|| $entry->issued
|| $entry->modified;
# entry time in seconds
my $et = parsedate($ft) || 0;
next unless($et =~ /^\d+$/ && $et >= $t );
my $title = $entry->title || 0;
my $url = $entry->link || 0;
my $description = $entry->{entry}{description} || 0;
if ($description) {
$description = "<p>". $description. "</p>";
}
my $o = &print_item($title, $url, $description);
push(@entries, $o);
}
if ($count) {
push(@entries, qq(\n<hr />\n\n));
}
return(@entries);
}
Generator/tr-old-extract-wiki.pl
#!/usr/bin/perl
# read wiki database directly via SQL
# and produce HTML
use Getopt::Long;
use Config::Tiny;
use Data::Dumper;
use DBI;
use File::Path qw(make_path);
use Encode;
use URI::Escape qw(uri_escape);
use open qw(:std :encoding(UTF-8));
use strict;
use warnings;
our %opt = (
'configfile' => '',
'verbose' => 0,
'help' => 0,
);
GetOptions (
"configfile|c" => \$opt{'configfile'}, # string
"verbose|v+" => \$opt{'verbose'}, # flag, multiple settings
"help|h" => \$opt{'help'}, # flag
);
my $configfile = $opt{configfile} || $ENV{HOME}.'/bin/tr-old-extract-wiki.config';
if (! -f $configfile) {
die;
}
if (! -r $configfile) {
die;
}
my $config = Config::Tiny->read($configfile);
my $database = $config->{database}->{database};
my $dbuser = $config->{database}->{username};
my $dbpasswd = $config->{database}->{password};
my $documentroot = $config->{webserver}->{documentroot};
my $wiki = $config->{webserver}->{subdirectory};
my $targetdir = $documentroot.$wiki;
if (! -e $targetdir) {
make_path($targetdir,{mode=>0775})
or die("Could not create path '$targetdir' : $!\n");
}
if ($opt{verbose}) {
print qq($documentroot, $wiki\n);
}
# connect to MySQL database
my $dsn = 'DBI:mysql:'.$database;
my %attr = ( PrintError=>0, # turn off error reporting via warn()
RaiseError=>1,
mysql_enable_utf8=>1,
); # turn on error reporting via die()
my $dbh = DBI->connect($dsn,$dbuser,$dbpasswd, \%attr);
$dbh->do('set names "UTF8"');
my $query = q(
SELECT text.old_id, page.page_title, text.old_text from page
LEFT JOIN revision on revision.rev_id=page.page_latest
LEFT JOIN text on text.old_id = revision.rev_text_id
);
my $sth = $dbh->prepare($query);
$sth->execute;
my %spam = &spam_list();
my %prev = ();
my %next = ();
my ($oldi, $newi, $midi) = () x 3;
my ($oldt, $newt, $midt) = () x 3;
while(my $row = $sth->fetchrow_hashref) {
$newi = decode('UTF-8', $row->{old_id});
$newt = decode('UTF-8', $row->{page_title});
if ($spam{$newt}) {
next;
}
if ( $newt =~ m/\.jpeg$/i
|| $newt =~ m/\.jpg$/i
|| $newt =~ m/\.png$/i
|| $newt =~ m/\.svg$/i
|| $newt =~ m/\.gif/i ) {
next;
}
if ($midi) {
$next{$midi}->{title} = $newt;
$next{$midi}->{oldid} = $newi;
if ($oldi) {
$prev{$midi}->{title} = $oldt;
$prev{$midi}->{oldid} = $oldi;
}
}
$oldi = $midi;
$oldt = $midt;
$midi = $newi;
$midt = $newt;
}
if ($midi) {
$next{$midi}->{title} = $newt;
$next{$midi}->{oldid} = $newi;
}
if ($oldi) {
$prev{$midi}->{title} = $oldt;
$prev{$midi}->{oldid} = $oldi;
}
my %category = ();
$sth->execute;
# old_id, old_text, page_title
while(my $row = $sth->fetchrow_hashref) {
my $old_id = $row->{old_id};
my $old_text = $row->{old_text};
my $page_title = $row->{page_title};
if ($spam{$page_title}) {
next;
}
if (! $old_id) {
next;
}
if ( $page_title =~ m/\.jpeg$/i
|| $page_title =~ m/\.jpg$/i
|| $page_title =~ m/\.png$/i
|| $page_title =~ m/\.svg$/i
|| $page_title =~ m/\.gif/i ) {
next;
}
$page_title =~ s/\|+/_/gm;
$old_text = decode('UTF-8', $old_text);
$page_title = decode('UTF-8', $page_title);
my $page = $targetdir.'/'.$page_title;
if (! -e $page) {
make_path($page,{mode=>0775})
or die("Could not create page path '$page' : $!\n");
}
if (! -d $page) {
die("Not a subdirectory: '$page_title'\n");
}
# not good work-around
next if ( -f $page.'/index.shtml');
open(my $pg, '>', $page.'/index.shtml')
or die("Could not wopen '$page' for writing: $!\n");
my ($p, $n) = () x2;
if ( exists( $prev{$old_id} )) {
$p = $prev{$old_id}->{title}
}
if ( exists( $next{$old_id} )) {
$n = $next{$old_id}->{title};
}
print $pg &make_html($old_id, $page_title, $old_text, \%category,
$p, $n);
close($pg);
# print $old_id,"\t",$page_title,"\n";
}
$sth->finish;
$dbh->disconnect;
foreach my $c (sort keys %category) {
my $dir = $documentroot.$wiki.'/Category/'.$c;
$dir =~ tr/ /_/;
if (! -e $dir) {
make_path($dir,{mode=>0775})
or die("Could not create page path '$dir' : $!\n");
}
open(my $cat, '>', $dir.'/index.shtml')
or die;
print $cat &make_cat($c, @{$category{$c}});
close($cat);
# print $c, ' : ', join(', ', @{$category{$c}}), "\n";
}
exit(0);
sub make_html {
my ($old_id, $page_title, $old_text, $category, $prev, $next) = @_;
# lll
if (! $old_text) {
return("<!-- $old_id -->") ;
}
$page_title =~ tr/_/ /;
$old_text = &markdown_to_html($old_text, $page_title, \$category);
my $p = $prev;
my $n = $next;
my $nav = '';
if ($prev && $next) {
$p =~ tr/ /_/;
$n =~ tr/ /_/;
$nav = qq(<a href="/wiki/$p">$prev</a> | <a href="/wiki/$n">$next</a>);
} elsif ($prev) {
$p =~ tr/ /_/;
$nav = qq(<a href="/wiki/$p">$prev</a> | <a name="next">next</a>);
} elsif ($next) {
$n =~ tr/ /_/;
$nav = qq(<a name="prev">prev</a> | <a href="/wiki/$n">$next</a>);
}
my $html = <<EOHTML;
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<!-- $old_id -->
<title>$page_title</title>
<link rel="stylesheet" href="/CSS/techrights-wiki.css"
media="screen" type="text/css" />
</head>
<body>
<!--#include virtual="/header.html"-->
<!--#include virtual="/feeds.html"-->
<div class="post">
<!--#include virtual="/navigation.html"-->
<div class="nav">
$nav
</div>
<h1>$page_title</h1>
<div class="wiki">
$old_text
</div>
</div>
<!--#include virtual="/footer.html"-->
</body>
</html>
EOHTML
return($html);
}
sub markdown_to_html {
my ($old_text, $page_title, $category) = @_;
if (! $old_text) {
return($old_text);
}
while ( $old_text =~ m/\[\[Category:\s*(.*)\]\]/m ) {
push(@{$category{$1}}, $page_title);
$old_text =~ s{\[\[Category:\s*(.*)\]\]}
{ my $c=$1; my $d=$c; $c=~tr/ /_/;
sprintf("<a href=\"/wiki/Category/%s\">Category:%s</a>", $c, $d)}emx;
}
# tables :/
if ( $old_text =~ m|\{\x{007c}([^\}]+)\x{007c}\}|m ) {
my $t = $1;
my $class='';
if ( $t =~ s|\s*class\s*=\s*"([^"]+)"|| ) {
$class = qq(class="$1" );
}
my $border='';
if ( $t =~ s|\s*border\s*=\s*"([^"]+)"|| ) {
$border = qq(border="$1");
}
# $t =~ s|<|\<|gm;
# $t =~ s|>|\>|gm;
$t =~ s{(\|-[^\n]*\n)?^\|} {<tr><td>}gm;
while ( $t =~ s{(?=<tr>)(.*?)\|\|} {$1 <td>} ) {
1;
}
$t =~ s{(\|-[^\n]*\n)?^\!} {<tr><th>}gm;
while ( $t =~ s{(?=<tr>)([^\!]+)\!{1,2}} {$1 <th>} ) {
1;
}
$old_text =~ s{\{\x{007c}([^\}]+)\x{007c}\}}
{<table $class $border>$t</table>}m;
}
$old_text =~ s|^={5}([^=]+)={5}|<h5>$1</h5>|gm; # h5
$old_text =~ s|^={4}([^=]+)={4}|<h4>$1</h4>|gm; # h4
$old_text =~ s|^={3}([^=]+)={3}|<h3>$1</h3>|gm; # h3
$old_text =~ s|^={2}([^=]+)={2}|<h2>$1</h2>|gm; # h2
$old_text =~ s|^={1}([^=]+)={1}|<h1>$1</h1>|gm; # h1
$old_text =~ s|^\*(.*)|<li>$1</li>|gm; # item list
$old_text =~ s|'{3}([^']+)'{3}|<b>$1</b>|gm; # bold
$old_text =~ s|'{2}([^']+)'{2}|<i>$1</i>|gm; # italics
$old_text =~ s|\n\s*\n|<br />\n<br />\n|gm; # line breaks
$old_text =~ s|(</h[0-6]>)<br />\n<br />\n|$1\n\n|gm; # remove extra breaks
# [[Image:Standard Life Logo.svg.png|frame|Standard Life stonewalled customers for months if not ''years'']]
# images
while ($old_text =~ m|\[\[Image:([^\]\|]+)[^\]]*\]\]|m) {
# hack for some wiki image links containing spaces in the names
my $oldimage = $1;
my $newimage = $oldimage;
$newimage =~ s/\W+$//mu;
$newimage =~ s| |_|gmu;
$old_text =~ s{\[\[Image:[^\]\|]+[^\]]*\]\]}
{<img src="/wiki/$newimage" />}mx;
}
# internal links
$old_text =~ s{\[\[([^\]]+)\]\]}
{ my $c=$1; my $d=$c; $c=~tr/ /_/;
sprintf("<a href=\"/wiki/%s\">%s</a>", $c, $d)}egmx;
# external links
$old_text =~ s{(?<!\[)\[([\w]+://*[^\s]+)\s+([^\]]+)\](?!\])}
{<a href="$1">$2</a>}gmx;
# make relative links
$old_text =~ s|([[:punct:]])https?://techrights.org/|"/o/|gm;
$old_text =~ s|([[:punct:]])https?://boycottnovell.com/|$1/o/|gm;
# update domain
$old_text =~ s|https?://boycottnovell.com/|https://techrights.org/o/|gm;
# make hyperlinks
$old_text =~ s{(?<!href=")(https?://\S*?)(?=\s|<|$)}
{<a href="$1">$1</a>}gmx;
return($old_text);
}
sub make_cat {
my ($c, @links) = @_;
my $l = '';
foreach my $ll (@links) {
my $href = $ll;
my $anchor = $ll;
$anchor =~ tr/_/ /;
$href =~ tr/ /_/;
$l .= qq(<li><a href="/wiki/$href">$anchor</a></li>\n);
}
my $html = <<EOHTML;
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>$c</title>
<link rel="stylesheet" href="/CSS/techrights-wiki.css"
media="screen" type="text/css" />
</head>
<body>
<h1>$c</h1>
<div class="wiki">
<ul>
$l
</ul>
</div>
</body>
</html>
EOHTML
return($html);
}
sub spam_list {
my @spam = qw(
Durchducvichaf
Kidsrapade
Chyslofire
Milsiocaubund
Srebovreterp
Privpiboduc
Negodida
Wahmnithundi
Turncusdevers
Gnathythfilfue
Pianutlesi
Quififorpi
Monsysilma
Tentscorimhyd
Tohumicri
Dedemizazz
Verbrockgodee
Trapsaudisbe
Estiocolo
Kinhundmotog
Pelgconlodi
Inindecof
Starelinge
Curombewoo
Rolcichare
Ltimulecan
Folhasite
Natiremen
Humnecasta
NitaDunkel
Maynamymab
Ningtraccomney
Imtehamed
Walfiltlosmie
Ertiworlnen
Truscagudam
Chaufuncmonra
Nantonoti
Smaresaplon
Urersele
Songhasjustpup
Zuodiminmei
Diastamizar
Fernbagasgu
Rahomoli
Leuhsonemful
Grisamriti
Chondthinsbeachsu
Detfofuri
Tatriesecon
Emanesxy
Nelfihobdi
Prepunreter
Fuecoditi
Roiminlytim
Ilbutepho
Haipawacyc
Tycarothszo
Bovolpyde
Diarabrelo
Pescozopo
Smactocterun
Gofftinrare
Gemsdecdogppa
Mabandlefbe
Prosansite
Rastfitoja
Gratfenounpu
Anmaphsuenuou
Mayproganin
Tirantdisfver
Orplanovreg
Nuicofuhos
Etimiban
Counopytneu
Atplanenir
Cumsmomudde
Rlassusatern
Tranrigambred
Gonlafanna
Verdispmimus
Pisupptowa
Tranockisse
Lingvertemac
Camphidermarb
Stylunothob
Stutexivin
Tirrvappadog
Verbertmentla
Lafelearli
Exettelbi
Vigenringwor
Biomosepart
Wipuncbehin
Hophikinvie
Cancetedust
Duffhillmispsa
Raucotipab
Greasusawen
Parlilutu
Montlewacent
Veswinfmagsett
Guiterdisi
Teovikebext
Tobiwekmo
Imgantiagarg
Nearaffcholar
Paimilexy
Hambmysqrinnens
Exexswitham
Dmakexulget
Laidileglia
Mauskighatsi
Solmonasul
Tacafunpho
Rackmapenle
Blincamfisa
Diatadicti
Whisthansibe
Maconninewp
Cendlinguhig
Chrispacharro
Distranchestterf
Alsecela
Kingcocugua
Zardwhatisma
Carpgnitfoting
Tatakapor
Contotuti
Pensprestiodu
Dontbuzztettuo
Capocselfnog
Mestlipami
Leonabsrabsits
Panowalfall
Sweepinbisu
Blephanamov
Difftahornsan
Mehotchkorming
Tranmenditext
Chronisinvi
Dullduahealde
Naysuckaupream
Sferpacardi
Roenibdeckket
Capmumenti
Taudiconsra
Niatecrespbirth
Portsuplare
Taiculmyti
Roundperjousa
Trancescopar
Clasororom
Rentreroovi
Larelecom
Clutombenus
Propinliedus
Gerrebeahum
Fortopccornlan
Cardnewrepost
Aloccila
Bopsiechartper
Veretrawolf
Cherracalu
Songnuanreiplic
Denisercest
Tybesroko
Dispthylpape
Basitadi
Tradopinsiou
Devespybou
Ningmedepub
Hoslienuti
Saubifortbi
Litelazbank
Perphalutag
Proglikeca
Recmortcontnot
Fenconsturil
Depkingpeemfest
Stabdazzpasseo
Enzvezpari
Keirirore
Lenfimalgzu
Gaybenzbeachfso
Necparakis
Sullinibo
Bestmanispio
Vaabrothvulki
Psychgochoba
Blacalcleanen
Rebriysihy
Spotarbirans
Podedrodis
Otarelve
Tersranbaness
Quefogeme
Haupufffchenon
Desdownterrea
Probeararli
Toefolgkomgest
Akofsuflo
Hersbennneri
Worrestsese
Alerhello
Mufulrintba
Colytormyo
Suitchochoscie
Neulervete
Freesesasot
Lcosesbolink
Xantticwcalvi
Terviapenta
Tielendade
Preachmeciset
Liomustawil
Loccsumxyablas
Lalivape
Geibobssety
Stylbifinvi
Nazeahiden
Enchedesimp
Doorppobubbdryc
Gepifonto
Ritheatsearchcor
Riaspecvamon
Afonmeri
Eracsaypsych
Enetacac
Readdcalviapos
Thankmarsuho
Serqaralo
Errhincymea
Nistdaphdisi
Comtoparmens
Efealdersynch
Perlealiti
Hotlifullse
Lutssobneworl
Realgemttangduf
Malemevas
Barsodacomp
Monpayliner
Lourssisrede
Achalperfi
Teyroledmi
Luweegoore
Ceinsecabem
Scorpostraldext
Starenpeli
Gleninjetest
Stocaseruc
Filmnewsllumter
Sioharreduc
Utebderno
Voypotursanc
Cotelurdy
Himasktownto
Imcrinexrea
Cayclothicra
Niesusore
Percomonews
Domisgeti
Bronthernrati
Joboremi
Arvigousde
Comgiegere
Fodamritster
Ferpotema
Pennmipinkfatt
Findprovaren
Madegonen
Sighdergprepber
Weckslotinte
Liohacorpo
Pauvolrana
Vedosuper
Mayclearesef
Ourcesridist
Tiosacalow
Hotomeju
Thiemogrioulym
Lesskompkarea
Contchistbirthtool
Glamperlheartchan
Gasboposse
Lioviawiker
Chooohoumuqma
Tetabviba
Emivthose
Circstephelem
Poonstelquiner
Gaimicfundbef
Ndureastelning
Credenaccom
Ncidolinran
Hufoptopost
Tercolinkmo
Hanyswheera
Hensbanmersjar
Prochcalofor
Waphonacchai
Footgarono
Scortingluthe
Ciovestblesber
Tumbragalnald
Tactcisitur
Pricerateth
Iczooptompto
Befeciva
Ilbelolas
Usitsomi
Gierecpeta
Machmilonwinf
Reeticargea
Heihamsgecsu
Tmatisrecback
Tellprototcoc
Nieprovintu
Lipantapen
Lingrihfastro
Sapptradronria
Kunparesria
Tribabsokin
Witchcolife
Confgeenele
Tabvivebi
Cabesere
Trevsotorne
Gelcosptogco
Elleaperge
Perpiewinddi
Tiastatnewsti
Ditcrowetou
Dabourtewa
Speceatchizuc
Pairavimer
Orimadoun
Kicksuarealre
Kirsrititi
Amlatuncomp
Mitselamu
Taitomantwha
Cewvilumro
Smilloverfers
Creatcoborrtent
Mycipeddstur
Ficonleso
Losihardport
Concfituano
Diabaltilog
Maviwabu
Subsvestlighbobs
Buimimilde
Diytaliga
Esibhipguard
Acananav
Dustrahoupec
Blascoormewoods
Protcocklunko
Basrisuspa
Temdabarbeau
Tijungcarsro
Itagnaba
Lokephasa
Boypentjufe
Rograustephex
Khokenanim
Afobbooksi
Deadrotosyp
Tumilzeire
Painacumtu
Ermoecappga
Unvafervi
Diosabbackpi
Stangimdila
Liamiginor
Olylemci
Pentmukrate
Giespechuddfol
Broommetherem
Zestbenewme
Aticemte
Philbmihubbtar
Fluxifcode
Tiostalonmig
Nietitasme
Inoversed
Tratmitoles
Asandoge
Contlireaten
Rolbeseti
Compfisanna
Kunareci
Coeramsaham
Swaterineer
Riracerse
Riachiatuga
Lianespoiga
Condicostswar
Adprovworlread
Podharddedgue
Mortpunccornpigs
Verbcentgili
Chromlatati
Stepmonlicat
Pherporato
Lantmingchignuzz
Veylibome
Stabdissnuhol
Araminis
Schelefamol
Coatistaihan
Comningplusrei
Roimidmavab
Tiolesigtia
Incallauleth
Songhostremray
Pulgeremix
Atertice
Gnosarpleassa
Imutafro
Grosenemlei
Drexharmcharsing
Bacmilesmort
Florgauchanli
Exundengigf
Exurudev
Prodabinta
Defdiketne
Gycetute
Gregemloma
Chennaucrandin
Tivabanma
Deefinmiscma
Ineninag
Amoselcon
Oretmalu
Fresunagka
Terpliretorc
Placununta
Exsehychan
Planitkoko
Ningpamassans
Sacberklati
Miabuntavi
Matnemeadsa
Blematzemos
Embiaquive
Denzizagorr
Crinvenpiehe
Vipemobeds
Kindrodofchurch
Waggbaberoo
Oronplorit
Kotinode
Grumrilbilux
Urorlabang
Anmacompphras
Naigentpersbe
Stepjingmeltscher
Nexsballmackwild
Unesnonnats
Illibima
Prevwinmering
Subsmonhandtas
Juegfasebzo
Roanvolkiachal
Niaraccata
Roawacoge
Bioreesyncha
Rieclimoubun
Exbecteli
Skivcounquemen
Fissabextcur
Relrabartmul
Kmobterfirsfic
Britabtradwer
Childdealhillbouk
Weldiagielo
Breakwopaca
Lasolvenal
Templalicu
Gasisoket
Outbeospirbo
Prinenened
Karirava
Mersgoomusen
Diofacompto
Tiobotile
Watriasacyc
Kelputithe
Tinckaleepe
Kufftaststeambang
Lenalode
Leftcaloka
Granelpervers
Vilchelisda
Compfindribe
Feeesalefoo
Keohighgyci
Theococoneg
Unobtothes
Enoglipo
Naitromhasa
Diemonensli
Delrainefonc
Buikephopa
Zyhoubarria
Putporttyla
Sunbextharal
Peyfillingre
Pelmilihi
Leiradalrea
Prepvyubeto
Cribsioronde
Catchdacusfi
Nenspocoling
Baileclikah
Erplutrasa
Conzapstercha
Cosglawnbaka
Gobbmathesuc
Bhavinbelkee
Tertnenruico
Preparadda
Findlinglenli
Togreborgde
Thumbtribimma
Dumtactnare
Briccuderla
Skeltifursti
Slabimabre
Forwibutpadd
Raipybasvie
Laispecgarbdaw
Piosicontmer
Mazibita
Statarathol
Chonddisrelet
Ovtranverpnfop
Warlirobi
Asmadipart
Cognesstasto
Thromimmocto
Barctwettunggend
Starodcheekdist
Padatiba
Dramininam
Pomisahou
Nterteslevan
Litpfasttime
Moccardrosif
Apidsursu
Sicomplefo
Mountiveneg
Ocenajmi
Mochilrela
Inchrysafin
Taufifkolamp
Inchrysafin
Ithtarseocar
Liadexnytu
Emounbata
Risenfizin
Lingsicufso
Wesgedisga
Walllinknafi
Ntoliphunun
Compcrisanic
Breakuninsi
Diodselpectmi
Osberpontspar
Stovulodfor
Pagebufchurch
Enidcorra
Amabibra
Ogalroran
Coliparpi
Abrelanfi
Centschafquimi
Fahrmennite
Vegusnogas
Erinfanle
Esindufo
Stonwithddilans
Werpsofcevers
Workknothmigua
Natucdecon
Brokaluntas
Pasvesufru
Siobufneico
Thyjumpconrio
Hallsimplosa
Buepalrica
Heiskilyltin
Terpmaconpo
Tingnetpvoca
Feforconsbo
Lieworrugif
Rayzavagen
Hadhertscorjui
Rietautave
Mapenquini
Toothfvanchildsitt
Torrnumpvadis
Sonmumoco
Lyakunegbi
Mulquevibubb
Illumonshy
Tyouzarwieclean
Quenusidich
Palightate
Intespeonet
Gobsscufdore
Mindradegtio
Churchspicopbi
Fetasibi
Jaatbegverback
Tasemeto
Outpiranoun
Greatrescamo
Esaqanfo
Irounourge
Rawealthtranor
Potenniter
Berlinewild
Winbybasneu
Litchchipversduff
Nopnomasporb
Sisetmissreapp
Beachbigebu
Quireamalib
Tomicneime
Molattcaci
Mirciabenchmo
Marrabugsi
Desbackvogtxa
Ezenenna
Heurotisen
Latoukenntwan
Kookshopsubar
Outitdenpai
Patrocitcons
Stocliekursness
Mapeldami
Rockbalhauha
Reilowose
Rikamkoma
Esagorex
Champxycquesul
Saumahtaking
Pildomedig
Unalroiven
Alflexuabros
Roiwronusel
Linklatabby
Propgeltefa
Projonaget
Trumsemasin
Errabketi
Atlobfitan
Quibitketua
Icamusin
Ellimevit
Saducnitof
Rhinkasihilt
Ringcutithe
Noineutruthmon
Posmadhchupa
Lalasina
Unhiecropur
Cremchisisttor
Hinkplesulal
Inanemta
Trempuletcont
Ciotperunan
Warscidoro
Mitlurado
Esbeautegking
Poitemlearnfun
Nessspecarel
Comvicongsves
Congrangcavi
Pinkmefehou
Verthvorsweati
Montcapefun
Tenhephilca
Crocunolre
Ceiselistri
Bauhardphocir
Mbureneled
Buehornjuwhitt
Jauletvana
Mulrisoftgol
Benzsenripe
Kowsmolici
Raythreatuniv
Inmihita
Quimopavab
Buyflaverav
Fedsightygis
Vicuttmicpang
Sieverberfcum
Roctulidi
Coultclearesal
Losearchnene
Mingcotohot
Copaposfull
Guckhorbohead
Provavedep
Neybalora
Icihymre
Ciouspokcotme
Emrasheartve
Ticacsosing
Proctireli
Serescabou
Alitrecte
Jetsfreedlumtu
Onunapin
Lunbewape
Vieclinecha
Schafroseerfund
Tumbbivaculp
Edinvildows
Tiulialessma
Vocababe
Keeritorster
Tiohosbioney
Prosavfalbull
Childdigtare
Protvitimi
Wornitihell
Premdownfumbfolg
Mortconpretmitt
Biochalocomp
Ousitxuacas
Osarinlas
Tersleliro
Laidenanfai
Schulmauwhemi
Diewylphovi
Consbudguijo
Downconhucan
Buytifbaper
Keeconolo
Hearmelyrea
Clinesilab
Ricutriicart
Bertjeftiddcan
Mosvafelo
Tincostsicing
Liastabalwor
Glanivvabke
Caelodeda
Lowbraliten
Prefilburge
Sonnfeshersgung
Grotobadcrac
Scaracatig
Cohaforra
Ticebeatda
Dieeproxrona
Thesdiadimto
Khanasywhout
Rumbtowhiti
Igarelwei
Nedetherbu
Preanexfervi
Senselado
Insomitji
Rantjumpvapo
Tiesimonsbelt
Kagafited
Ronleamimy
Bountermmiwo
Peakcheadtimmtos
Biocritmatre
Trovdistpilis
Neytradcabo
Rienimoten
Carotopa
Ducsemosi
Chaouruscico
Micelwacons
Hissohoci
Curbivenes
Whomoguater
Svilenalis
Viwinshyfunc
Ertraclandto
Tjosispiohi
Erdefangui
Paachengkatzted
Petviefinmo
Prefesidbo
Clapjiggkeher
Laumatbofa
Evelemne
Unuldaupo
Nonisovi
Tictuacichi
Kicktatoume
Nivennitan
Figteirunca
Haicalsoivent
Tactsapapen
Capmeifondnic
Oupcalrecho
Mullamaround
Weibechama
Acusexad
Hiwanacsvid
Vsetdesversfootb
Antocorno
Rosawannai
Boascamlinktos
Remoundaphos
Tipsjusmaihal
Apopongeb
Eclontaiho
Nonschopswalmo
Elamgazakh
Chareadecbea
Steerdalpuymaa
Nvespeasynob
Pewidcecar
Lifurdaypitt
Enpodsine
Nicmofimag
Blutfonferen
Cicvetugma
Fuwarransma
Ratanacu
Scurcuningmel
Payraconcstag
Ryckratizga
Belsbigroca
Boozfiderche
Badcfenrames
Miwordcourma
Poceduni
Migmietyce
Solabising
Flamgatmori
Nenbmegumal
Breathinschenaq
Neulebichen
Quocraxsipuk
Lsolunrelfi
Dernobooser
Trotrithtiva
Tuomitota
Haumenthisfspic
Icbeabuci
Trolugegun
Avsullafor
Fauspeechulov
Lubtidecca
Centrannordti
Ghibaruppop
Wiggmecorvoi
);
my %spammer;
foreach my $s (@spam) {
if ($s) {
$spammer{$s}++
}
}
return(%spammer);
}
# Structure of the config file:
# following assumes the wiki tables are all in the 'wiki' database and
# that the account 'archive' has SELECT capabilities for all those tables
# [database]
# database = wiki
# username = archive
# password = ..........
#
# [webserver]
# documentroot = /var/www/techrights.org/htdocs
# subdirectory = /wiki
Generator/tr-generate-gemtext-index.sh
#!/bin/sh PATH=/usr/local/bin:/usr/bin:/bin h=/home/gemini/techrights.org/ conf=/etc/tr-initialize-static-site-generator.conf if ! test -d $h; then if ! mkdir -p $h; then echo "Could not make '$h'" exit 1 fi echo "Created directory '$h'" fi if ! test -w $h; then echo "Could not open '$h' for writing" exit 1 fi cat $h/index.template > $h/index.gmi date +"# Recent Posts as of %b %e, %Y%n" >> $h/index.gmi tr-generate-feed.pl \ -c $conf \ -g -n 15 \ >> $h/index.gmi echo >> $h/index.gmi cat <<EOT >> $h/index.gmi ## Additional Information => /feed.xml Atom Feed for this Gemini capsule EOT cat $h/hitclock >> $h/index.gmi exit 0
Generator/tr-ssh-wrapper.pl
#!/usr/bin/perl -T
use URI;
use English;
use strict;
use warnings;
# Make %ENV safer
delete @ENV{qw(IFS CDPATH ENV BASH_ENV)};
# assign PATH explicitly
local $ENV{PATH} = "/bin:/usr/bin:/usr/local/bin";
my $option = $ENV{'SSH_ORIGINAL_COMMAND'};
if (!$option) {
exit(1);
}
if ($option =~ m/^new$/i
|| $option =~ m/^add$/i ) {
exec("/usr/local/bin/add-and-refresh-from-db.sh");
} elsif ($option =~ m/^update\s+/) {
my ($url) = ($option =~ m/\s+(\S+)$/);
my $uri = URI->new($url)
or die();
my $scheme = $uri->scheme
or die();
my $host = $uri->host
or die();
my $path = $uri->path
or die();
if ($scheme ne 'http'
&& $scheme ne 'https' ){
die;
}
if ($host ne 'techrights.org'
&& $host ne 'www.techrights.org'
&& $host ne 'news.techrights.org') {
die;
}
my $documentroot = '/var/www/techrights.org/htdocs';
if (! -f "$documentroot/$path") {
die;
}
my $clean = "$scheme://$host$path";
exec('/usr/local/bin/update-and-refresh-from-db.sh',$clean);
}
exit(0);
Generator/tr-extract-posts-sql.pl
#!/usr/bin/perl
# See Git for history
# fetches posts from database and
# writes both XHTML and GemText versions in parallel
# to their default directories, for both drafts and
# finished posts.
# The default locations are overridden
# with -g or -x, or -dg or -dx
use utf8;
use Getopt::Long;
use Date::Calc qw(check_date Today);
use DBI qw(:sql_types);
use File::Path qw(make_path);
use URI::Escape;
use URI;
use Date::Calc qw(Date_to_Time);
use POSIX qw(strftime);
use HTML::TreeBuilder::XPath;
use HTML::Entities qw(encode_entities_numeric decode_entities);
use Encode; # decode is needed for HTML::TreeBuilder::XPath
use Capture::Tiny qw(capture);
use Config::Tiny;
use open qw(:std :encoding(UTF-8));
use English;
use strict;
use warnings;
if ($ENV{'USER'} eq 'root' && ! $ENV{'EUID'}) {
print STDERR qq(Cannot run as root!\nAborting\n);
exit(1);
}
my ($all,
$config,
$date,
$force,
$gemtext_path,
$gemtext_draft_path,
$help,
$since,
$unwritten,
$xhtml_path,
$xhtml_draft_path,
) = ('') x 11;
our $VERBOSE = 0;
GetOptions ("all" => \$all,
"config|c=s" => \$config,
"date|d=s" => \$date,
"force" => \$force,
"gemini:s" => \$gemtext_path,
"draft-gemini:s" => \$gemtext_draft_path,
"help" => \$help,
"since" => \$since,
"unwritten" => \$unwritten,
"xhtml:s" => \$xhtml_path,
"draft-xhtml:s" => \$xhtml_draft_path,
"verbose+" => \$VERBOSE,
);
my ( $script ) = ( $0 =~ m|(?!.*/)(.*)\.[^\.]*$| );
if ($help) {
my $err = 0;
&usage($script, 'sample.conf', $err);
}
if (! $config) {
warn("Provide configuration file via the -c option.\n");
my $err = 1;
usage($script, 'sample.conf', $err);
}
if (! -f $config) {
my $err = 1;
warn("Provide configuration file via the -c option.\n");
&usage($script, $config, $err);
} elsif (! -r $config) {
die("Configuration file '$config' is not readable\n");
}
my $configuration = Config::Tiny->read($config)
or die("Could not read configurationn file '$config': $!\n");
my $dbname = $configuration->{database}->{name}
or die("Database name missing from configuration file\n");
my $documentroot = $configuration->{webserver}->{documentroot}
or die("DocumentRoot missing from configuration file\n");
my $serverroot = $configuration->{webserver}->{serverroot}
or die("ServertRoot missing from configuration file\n");
my $geminiroot = $configuration->{gemini}->{geminiroot}
or die("GeminiRoot missing from configuration file\n");
if (! $xhtml_path) {
$xhtml_path = $documentroot . "/n/";
}
if (! $xhtml_draft_path) {
$xhtml_draft_path = $documentroot . "/drafts/";
}
if (! $gemtext_path){
$gemtext_path = $geminiroot . "/n/";
}
if (! $gemtext_draft_path) {
$gemtext_draft_path = $geminiroot . "/drafts/";
}
my $dbfile = $serverroot . '/db/'. $dbname;
&prepare_paths($xhtml_path, $xhtml_draft_path,
$gemtext_path, $gemtext_draft_path);
my ($year, $month, $day) = &get_date($date);
if ($since) {
print "Starting Date: $year/$month/$day\n" if ($VERBOSE);
} else {
print "Date: $year/$month/$day\n" if ($VERBOSE);
}
my $dbh = DBI->connect("dbi:SQLite:dbname=$dbfile", undef, undef,
{ AutoCommit => 0, RaiseError => 0 })
or die("Could not open database '$dbfile': $!\n");
$dbh->sqlite_busy_timeout(10000); # milliseconds to wait for locks
# three tries at opening the database for exclkusive writing
my $count = 3;
while ($count--) {
my ($stdout, $stderr, @result)
= capture { $dbh->do('PRAGMA locking_mode = EXCLUSIVE'); };
if (! shift @result) {
print STDERR qq($count: $script trying to get database lock\n);
if (!$count) {
die("Could not get lock for '$dbfile': $!\n");
}
}
}
# drafts must come first because some may become finalized posts
&move_finished_drafts($dbh);
&extract_and_write_drafts($dbh);
&extract_and_write_posts($dbh, $year,$month,$day,
$force, $all, $since, $unwritten);
$dbh->disconnect;
exit(0);
sub usage {
my ($script, $config, $error) = @_;
print <<"EOU";
USAGE:
$script -c config [-ahfsuv] [-d date] [-g path] [-x path]
-a, --all extract all records regardless of other settings
-c, --config path to configuration file
-d, --date date as YYYYMMDD, defaults to today if missing
-f, --force force all files, written or unwritten
-g, --gemini override default destination path for GemText
--draft-gemini override default destination for GemText drafts
-s, --since also include all posts since the given date
-u, --unwritten extract all unwritten records
-x, --xhtml override default destination path for XHTML
--draft-xhtml override default destination for XHTML drafts
-v, --verbose show debugging info
-h, --help show this message
By default, only records which have not been extracted yet will be written. This can be overriden with the -f option. The -g and -x options can each be used to point to other paths and override the defaults.
Drafts are stored elsewhere. The -dg and -dx options override the
default draft locations.
The -a and the -u options are mutually exclusive and -a takes precedence.
EOU
if ($config eq 'sample.conf') {
print "\nProvide a configuration file, ";
} else {
print "\nLooking for config file in '$config',\n";
}
print <<"EOC";
for example:
[database]
name = tr-static-site-generator.sqlite3
images = tr-static-site-generator-img.sqlite3
[gemini]
geminiroot = /home/gemini/site1.example.org/
[webserver]
documentroot = /var/www/site1.example.org/htdocs
serverroot = /var/www/site1.example.org/
EOC
if ($error) {
exit(1);
}
exit(0);
}
sub prepare_paths {
my ($xhtml_path, $xhtml_draft_path,
$gemtext_path, $gemtext_draft_path) = @_;
$gemtext_path = &get_path($gemtext_path);
&prepare_directory($gemtext_path);
if ($VERBOSE > 1) {
print qq(GemText Path = $gemtext_path\n);
}
$xhtml_path = &get_path($xhtml_path);
&prepare_directory($xhtml_path);
if ($VERBOSE > 1) {
print qq(XHTML Path = $xhtml_path\n);
}
$gemtext_draft_path = &get_path($gemtext_draft_path);
&prepare_directory($gemtext_draft_path);
if ($VERBOSE > 1) {
print qq(Draft GetText Path = $gemtext_draft_path\n);
}
$xhtml_draft_path = &get_path($xhtml_draft_path);
&prepare_directory($xhtml_draft_path);
if ($VERBOSE > 1) {
print qq(Draft XHTML Path = $xhtml_draft_path\n);
}
return(1);
}
sub get_path {
my ($p) = @_;
$p = '' if (!defined($p)); # options could start undef
$p =~ s|(?<!/)$|/|; # add a trailing slash, if needed
my $path = '';
if ($p) {
my @directories = reverse(split(m/\//, $p));
my @canonical_path = ();
while (@directories) {
my $dir = shift @directories;
if (!length($dir)) {
next;
}
if ($dir eq ".") {
next;
}
if ($dir eq "..") {
shift @directories;
next;
}
push @canonical_path, $dir;
}
$path = '/'.join("/", reverse @canonical_path);
if ($path eq '/') {
die("The directory '$path' is not acceptable\n");
}
if ($path !~ m|/$|) {
$path = $path . '/';
}
}
if (-d $path) {
if (-w $path) {
return($path);
} else {
die("The directory '$path' is not writable\n");
}
} elsif (-e $path) {
die("The destination '$path' is not a directory\n");
} else {
die("The directory '$path' does not exist\n");
}
die("Could not work out a path\n");
}
# validate and return date from option XOR current date
sub get_date {
my ($d) = @_;
my ($year, $month, $day);
my $date = '';
if ($d) {
if ( ($date) = ($d =~ m/^([0-9]{4}-[0-9]{2}-[0-9]{2})$/)
or
($date) = ($d =~ m/^([0-9]{4}[0-9]{2}[0-9]{2})$/)
) {
$date =~ s/-//g;
}
if (!$date) {
print STDERR qq(Invalid date '$d'\n);
exit(1);
}
($year,$month,$day) =
($date =~ m/^([0-9]{4})([0-9]{2})([0-9]{2})$/);
if (! check_date($year,$month,$day)) {
print STDERR qq(Invalid date '$date'\n);
exit(1);
}
}
if (!$date) {
($year,$month,$day) = Today(1); # get date GMT
$year = sprintf("%04d", $year);
$month = sprintf("%02d", $month);
$day = sprintf("%02d", $day);
}
return($year, $month, $day);
}
sub extract_and_write_posts {
my ($dbh, $year,$month,$day, $force, $all, $since, $unwritten) = @_;
my $draft_status = 0;
my $date = "$year-$month-$day";
# make an extended set for titles and links outside the written set
my %full_list = ();
# choose among option a, u, f, or s
my $sth = &initial_query_to_get_posts_to_publish($dbh, $date, $force,
$all, $since, $unwritten);
# list of records to publish, excluding extra prev and next
my %record = ();
my $lowest = 0;
my $highest = 0;
while (my $data = $sth->fetchrow_hashref) {
my $recno = $data->{'recno'};
if (!$lowest) {
$lowest = $recno;
}
$highest = $recno;
$record{$recno}{'slug'} = decode('UTF-8', $data->{'slug'});
$record{$recno}{'ballast'} = $data->{'ballast'};
$record{$recno}{'date'} = $data->{'date'};
$record{$recno}{'written'} = $data->{'written'};
$full_list{$recno}{'slug'} = $data->{'slug'};
$full_list{$recno}{'ballast'} = $data->{'ballast'};
$full_list{$recno}{'date'} = $data->{'date'};
$full_list{$recno}{'written'} = $data->{'written'};
}
$sth->finish;
if ($VERBOSE) {
print "HI: $highest\nLOW: $lowest\n";
}
# get the metadata for the first record before the retreived set
if ($lowest) {
my ($prev, $date, $slug, $ballast, $written)
= &prev_recno($dbh,$lowest);
if ($prev) {
$record{$prev}{'date'} = $date;
$record{$prev}{'slug'} = decode('UTF-8', $slug);
$record{$prev}{'ballast'} = $ballast;
$record{$prev}{'written'} = $written;
($prev, $date, $slug, $ballast, $written)
= &prev_recno($dbh, $prev);
if ($prev) {
$full_list{$prev}{'date'} = $date;
$full_list{$prev}{'slug'} = $slug;
$full_list{$prev}{'ballast'} = $ballast;
$full_list{$prev}{'written'} = $written;
}
}
}
# get the metadata for the next record after the retrieved set
if ($highest) {
my ($next, $date, $slug, $ballast, $written, $status)
= &next_recno($dbh, $lowest);
if ($next) {
$record{$next}{'date'} = $date;
$record{$next}{'slug'} = decode('UTF-8', $slug);
$record{$next}{'ballast'} = $ballast;
$record{$next}{'written'} = $written;
($next, $date, $slug, $ballast, $written)
= &next_recno($dbh, $next);
if ($next) {
$full_list{$next}{'date'} = $date;
$full_list{$next}{'slug'} = $slug;
$full_list{$next}{'ballast'} = $ballast;
$full_list{$next}{'written'} = $written;
}
}
}
# cache previous/next data for each record in the set
for my $recno (sort {$a <=> $b} keys %record) {
my ($prev, $next, $date, $slug, $ballast, $written, $status);
($next, $date, $slug, $ballast, $written) =
&next_recno($dbh, $recno);
if ($next) {
$full_list{$recno}{'next'} = $next;
$full_list{$next}{'date'} = $date;
$full_list{$next}{'slug'} = decode('UTF-8', $slug);
$full_list{$next}{'ballast'} = $ballast;
$full_list{$next}{'written'} = $written;
}
($prev, $date, $slug, $ballast, $written) =
&prev_recno($dbh, $recno);
if ($prev) {
$full_list{$recno}{'prev'} = $prev;
$full_list{$prev}{'date'} = $date;
$full_list{$prev}{'slug'} = decode('UTF-8', $slug);
$full_list{$prev}{'ballast'} = $ballast;
$full_list{$prev}{'written'} = $written;
}
}
# third cycle: is this necessary? can title be collected earlier?
$sth = $dbh->prepare('SELECT metadata.value
FROM metadata
WHERE metadata.term="dc.title"
AND metadata.recno=?');
for my $recno (sort {$a <=> $b} keys %full_list) {
$sth->execute($recno) or die();
my $rec = $sth->fetchrow_hashref;
my $title = $rec->{'value'};
$title = encode_entities_numeric(decode_entities($title), '&');
$title = decode('UTF-8', $title);
$full_list{$recno}{'title'} = $title;
$sth->finish;
}
if (!%record) {
print "No records or no unwritten records.\n\n";
return(0);
}
# it's probably faster to write both types than to track both separately
for my $recno (sort {$a <=> $b} keys %record) {
my $slug = $full_list{$recno}{'slug'};
my $ballast = $full_list{$recno}{'ballast'};
my $date_created = $full_list{$recno}{'date'} ||
die("Missing dc.date.created : $recno\n");
$date_created =~ s|^([0-9]{4})([0-9]{2})([0-9]{2})$|$1/$2/$3|;
if (-d $xhtml_path) {
# http / https
my $xhtml = &generate_xhtml($recno, $draft_status,
\%full_list);
&write_xhtml($dbh, $recno, "$xhtml_path$date_created/",
$slug, $ballast, $xhtml, 0);
} else{
warn ("Problem with '$xhtml_path', nothing written\n");
return(0);
}
if (-d $gemtext_path) {
# gemini
my $gemtext = &generate_gemtext($recno, $draft_status,
\%full_list);
&write_gemtext($recno, "$gemtext_path$date_created/",
$slug, $ballast, $gemtext, 0);
} else{
warn ("Problem with '$gemtext_path', nothing written\n");
return(0);
}
}
return(1);
}
sub initial_query_to_get_posts_to_publish {
my ($dbh, $date, $force, $all, $since, $unwritten) = @_;
# $sth Statement handle object
my $sth;
my $query;
if ($force && $all) {
$query = qq(SELECT keys.recno,keys.date,slug,
ballast,written
FROM keys
WHERE keys.recno>=1
GROUP BY keys.recno
ORDER BY keys.recno ASC);
$sth = $dbh->prepare($query)
or die "prepare statement failed: $dbh->errstr()\n";
$sth->execute()
or die "execute statement failed: $dbh->errstr()\n";
} elsif ($force && $since) {
$query = qq(SELECT keys.recno,keys.date,keys.slug,
keys.ballast,keys.written
FROM keys
INNER JOIN metadata
ON keys.recno = metadata.recno
AND ( metadata.term="dc.date.modified"
OR
metadata.term="dc.date.created" )
AND substr(metadata.value,1,10)>=?
GROUP BY keys.recno
ORDER BY keys.recno ASC);
$sth = $dbh->prepare($query)
or die "prepare statement failed: $dbh->errstr()\n";
$sth->execute($date)
or die "execute statement failed: $dbh->errstr()\n";
} elsif($force) {
$query = qq(SELECT keys.recno,keys.date,keys.slug,
keys.ballast,keys.written
FROM keys
INNER JOIN metadata
ON keys.recno = metadata.recno
AND ( metadata.term="dc.date.modified"
OR
metadata.term="dc.date.created" )
AND substr(metadata.value,1,10)=?
GROUP BY keys.recno
ORDER BY keys.recno ASC);
$sth = $dbh->prepare($query)
or die "prepare statement failed: $dbh->errstr()\n";
$sth->execute($date)
or die "execute statement failed: $dbh->errstr()\n";
} elsif ($all) {
$query = qq(SELECT keys.recno,keys.date,slug,
ballast,written
FROM keys
WHERE keys.recno>=1
AND written=0
GROUP BY keys.recno
ORDER BY keys.recno ASC);
$sth = $dbh->prepare($query)
or die "prepare statement failed: $dbh->errstr()\n";
$sth->execute()
or die "execute statement failed: $dbh->errstr()\n";
} elsif ($unwritten) {
$query = qq(SELECT keys.recno,keys.date,slug,ballast,
written
FROM keys
WHERE keys.recno>=1
AND written=0
GROUP BY keys.recno
ORDER BY keys.recno ASC);
$sth = $dbh->prepare($query)
or die "prepare statement failed: $dbh->errstr()\n";
$sth->execute()
or die "execute statement failed: $dbh->errstr()\n";
} elsif ($since) {
$query = qq(SELECT keys.recno,keys.date,keys.slug,
keys.ballast,keys.written
FROM keys
INNER JOIN metadata
ON keys.recno = metadata.recno
AND ( metadata.term="dc.date.modified"
OR
metadata.term="dc.date.created" )
AND substr(metadata.value,1,10)>=?
WHERE written=0
GROUP BY keys.recno
ORDER BY keys.recno ASC);
$sth = $dbh->prepare($query)
or die "prepare statement failed: $dbh->errstr()\n";
$sth->execute($date)
or die "execute statement failed: $dbh->errstr()\n";
} else {
$query = qq(SELECT keys.recno,keys.date,keys.slug,
keys.ballast,keys.written
FROM keys
INNER JOIN metadata
ON keys.recno = metadata.recno
AND ( metadata.term="dc.date.modified"
OR
metadata.term="dc.date.created" )
AND substr(metadata.value,1,10)=?
WHERE written=0
GROUP BY keys.recno
ORDER BY keys.recno ASC);
$sth = $dbh->prepare($query)
or die "prepare statement failed: $dbh->errstr()\n";
$sth->execute($date)
or die "execute statement failed: $dbh->errstr()\n";
}
if ($VERBOSE > 1) {
print "Main Query= $query\n";
}
return($sth);
}
sub next_recno {
my ($dbh, $recno) = @_;
my $query = qq(SELECT recno, date, slug, ballast, written
FROM keys
WHERE recno >?
ORDER BY recno ASC LIMIT 1);
my $sth = $dbh->prepare($query)
or die();
$sth->execute($recno);
my ($next, $date, $slug, $ballast, $written) = (0) x 5;
if (my $record = $sth->fetchrow_hashref) {
$next = $record->{'recno'};
$date = $record->{'date'};
$slug = $record->{'slug'};
$ballast = $record->{'ballast'};
$written = $record->{'written'};
}
$sth->finish;
return($next, $date, $slug, $ballast, $written);
}
sub prev_recno {
my ($dbh, $recno) = @_;
my $query = qq(SELECT recno, date, slug, ballast, written
FROM keys
WHERE recno <?
ORDER BY recno DESC LIMIT 1);
my $sth = $dbh->prepare($query)
or die();
$sth->execute($recno);
my ($prev, $date, $slug, $ballast, $written) = (0) x 5;
if (my $record = $sth->fetchrow_hashref) {
$prev = $record->{'recno'};
$date = $record->{'date'};
$slug = $record->{'slug'};
$ballast = $record->{'ballast'};
$written = $record->{'written'};
}
$sth->finish;
return($prev, $date, $slug, $ballast, $written);
}
sub generate_xhtml {
my ($recno, $draft_status, $_data) = @_;
my %data = %{$_data};
if ($VERBOSE) {
print "Generating XHTML $recno\n";
}
my ($head, $title, $author, $date_created, $date_modified) =
&fetch_head($dbh, $recno);
$head = "<!-- $recno -->\n".$head;
my $prev_link = qq(<a name="prev">previous</a>);
if ($data{$recno}{'prev'}) {
my $prev = $data{$recno}{'prev'};
my $date = $data{$prev}{'date'};
my $title = $data{$prev}{'title'};
my $url = '';
if ($date) {
$date =~ s|^([0-9]{4})([0-9]{2})([0-9]{2})$|$1/$2/$3|;
my $slug = $data{$prev}{'slug'};
my $ballast = $data{$prev}{'ballast'};
if ($ballast) {
$url = "/n/$date/$slug.$ballast.shtml";
} else {
$url = "/n/$date/$slug.shtml";
}
} else {
die("Missing date\n");
}
$prev_link = qq(<a href="$url" title="previous">$title</a>);
$head = $head.qq( <link rel="prev" href="$url" />\n);
}
my $next_link = qq(<a name="next">next</a>);
if ($data{$recno}{'next'}) {
my $next = $data{$recno}{'next'};
my $date = $data{$next}{'date'};
my $title = $data{$next}{'title'};
my $url = '';
if ($date) {
$date =~ s|^([0-9]{4})([0-9]{2})([0-9]{2})$|$1/$2/$3|;
my $slug = $data{$next}{'slug'};
my $ballast = $data{$next}{'ballast'};
if ($ballast) {
$url = "/n/$date/$slug.$ballast.shtml";
} else {
$url = "/n/$date/$slug.shtml";
}
} else {
die("Missing date\n");
}
$head = $head.qq( <link rel="next" href="$url" />\n);
$next_link = qq(<a href="$url" title="next">$title</a>);
}
# print $head,"\n";
my $pdate = &pdate($date_created);
if ($date_modified gt $date_created) {
$pdate .= ",<br />\nupdated ".&pdate($date_modified);
}
my $body = &fetch_xhtml_body($dbh, $recno, $draft_status);
my $xhtml = &new_xhtml_document($title,$pdate,$author,
$prev_link,$next_link,$head,$body);
return($xhtml);
}
sub fetch_head {
my ($dbh, $recno, $draft_status) = @_;
my $title = '';
my $author = '';
my $date_created = '';
my $date_modified = '';
my @head = ();
my $query;
if ($draft_status) {
$query = qq(SELECT term,value FROM draft_metadata WHERE recno=?);
} else {
$query = qq(SELECT term,value FROM metadata WHERE recno=?);
}
my $sth = $dbh->prepare($query);
$sth->execute($recno) or die();
while (my $record = $sth->fetchrow_hashref) {
# print Dumper($record);
my $term = $record->{'term'};
my $value = decode('UTF-8', $record->{'value'});
$value =~ s/"/"/g;
if ($term eq 'dc.title') {
$title = $value;
push(@head, qq(<title>Techrights — $title</title>));
} elsif ($term eq 'dc.creator') {
$author = $value;
} elsif ($term eq 'dc.date.created') {
$date_created = $value;
} elsif ($term eq 'dc.date.modified') {
$date_modified = $value;
} elsif ($term eq 'slug') {
next;
}
push(@head, qq(<meta name="$term" content="$value" />));
}
my $head = " ".join("\n ", @head)."\n";
$sth->finish;
return($head, $title, $author, $date_created, $date_modified);
}
sub fetch_xhtml_body {
my ($dbh, $recno, $draft_status) = @_;
my $query;
if ($draft_status) {
$query = qq(SELECT body FROM draft_body WHERE recno=?);
} else {
$query = qq(SELECT body FROM body WHERE recno=?);
}
my $sth = $dbh->prepare($query);
$sth->execute($recno);
my $body = '';
while (my $record = $sth->fetchrow_hashref) {
$body = $record->{'body'};
}
$body = decode('UTF-8', $body);
$sth->finish;
return($body);
}
sub new_xhtml_document {
my ($title,$pdate,$author,$prevlink,$nextlink,$head,$post) = @_;
my $html = <<"EOHTML";
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
$head
<link rel="stylesheet" href="/CSS/techrights.2026.css"
media="screen" type="text/css" />
<link rel="alternate" type="application/rss+xml" href="/feed.xml"
title="Techrights" />
<link rel="shortcut icon" href="/favicon.ico" type="image/x-icon" />
</head>
<body>
<!--#include virtual="/header.html"-->
<!--#include virtual="/feeds.html"-->
<div class="post">
<!--#include virtual="/navigation.html"-->
<div class="navigation2">
<ul>
<li>$prevlink</li>
<li>$nextlink</li>
</ul>
</div>
<h1>$title</h1>
<p class="author">posted by $author on $pdate<br /></p>
$post
</div>
<div class="navigation2">
<ul>
<li>$prevlink</li>
<li>$nextlink</li>
</ul>
</div>
<h1>Other Recent Techrights' Posts</h1>
<!--#include virtual="/latest-news.html"-->
<div class="navigation2">
<ul>
<li>$prevlink</li>
<li>$nextlink</li>
</ul>
</div>
<!--#include virtual="/footer.html"-->
</body>
</html>
EOHTML
return($html);
}
sub write_xhtml {
my ($dbh, $recno, $path, $slug, $ballast, $xhtml, $draft) = @_;
if (! &prepare_directory($path)) {
return(0);
}
my $file;
if ($ballast) {
$file = "$path$slug.$ballast.shtml";
} else {
$file = "$path$slug.shtml";
}
print " Fx: $file\n" if ($VERBOSE);
my $doc;
# $xhtml = decode('UTF-8', $xhtml);
open($doc, '>', $file)
or die("Could not open '$file' for writing: $!\n");
print $doc $xhtml;
close($doc);
my $query;
if (!$draft) {
$query = qq(UPDATE keys
SET written=1
WHERE recno =?);
} else {
$query = qq(UPDATE draft_keys
SET written=1
WHERE recno =?);
}
if ($VERBOSE > 2) {
print "Update recno = $recno\n";
print "Update query = $query\n";
print "Update dbfile = '$dbfile'\n";
}
my $sth;
$sth = $dbh->prepare($query)
or die($sth->errstr."\n");
$sth->execute($recno)
or die($sth->errstr."\n");
$dbh->commit;
$sth->finish;
return(1);
}
sub prepare_directory {
my ($path) = @_;
if ( -e $path) {
if ( ! -d $path) {
warn "Target already exists but is not a directory: '$path'\n";
return(0);
}
if ( ! -w $path) {
print STDERR "Target is not a writable: '$path'\n";
return(0);
}
# path exists and is writable
return(1);
} else {
make_path($path,{mode => 0775})
or die("Could not create path '$path' : $!\n");
print "Created directory '$path'\n" if ($VERBOSE);
return(1);
}
}
sub pdate {
my ($date) = @_;
my ($pub_year,$pub_month,$pub_day) =
( $date =~ m/^([0-9]{4})-([0-9]{2})-([0-9]{2})T.*$/);
my $pub_date = Date_to_Time($pub_year, $pub_month, $pub_day, 0, 0, 0);
my $pdate = strftime("%b %d, %Y", gmtime($pub_date));
return($pdate);
}
sub generate_gemtext {
my ($recno, $draft_status, $_data) = @_;
my %data = %{$_data}; # hash as next parameter
my $gemtext = '';
if ($VERBOSE) {
print "Writing GemText $recno\n";
}
my (undef, $title, $author, $date_created, $date_modified) =
&fetch_head($dbh, $recno);
my $prev_link = '';
if ($data{$recno}{'prev'}) {
my $prev = $data{$recno}{'prev'};
my $date = $data{$prev}{'date'};
my $title = $data{$prev}{'title'};
$title = decode_entities($title);
# $title = decode('UTF-8', $title);
my $url = '';
if ($date) {
$date =~ s|^([0-9]{4})([0-9]{2})([0-9]{2})$|$1/$2/$3|;
my $slug = $data{$prev}{'slug'};
my $ballast = $data{$prev}{'ballast'};
if ($ballast) {
$url = "/n/$date/$slug.$ballast.gmi";
} else {
$url = "/n/$date/$slug.gmi";
}
} else {
die("Missing date\n");
}
# $title = decode('UTF-8', $title);
# $url = decode('UTF-8', $url);
$prev_link = qq(=>\t$url\t$title);
}
my $next_link = '';
if ($data{$recno}{'next'}) {
my $next = $data{$recno}{'next'};
my $date = $data{$next}{'date'};
my $title = $data{$next}{'title'};
$title = decode_entities($title);
# $title = decode('UTF-8', $title);
my $url = '';
if ($date) {
$date =~ s|^([0-9]{4})([0-9]{2})([0-9]{2})$|$1/$2/$3|;
my $slug = $data{$next}{'slug'};
my $ballast = $data{$next}{'ballast'};
if ($ballast) {
$url = "/n/$date/$slug.$ballast.gmi";
} else {
$url = "/n/$date/$slug.gmi";
}
} else {
die("Missing date\n");
}
# $title = decode('UTF-8', $title);
# $url = decode('UTF-8', $url);
$next_link = qq(=>\t$url\t$title);
}
my $pdate = &pdate($date_created);
if ($date_modified gt $date_created) {
$pdate .= ",\nupdated ".&pdate($date_modified);
}
my $body = &fetch_xhtml_body($dbh, $recno, $draft_status);
$body = &xhtml_to_gemtext($body);
$title = decode_entities($title);
$gemtext = &new_gemtext_document($title,$pdate,$author,
$prev_link,$next_link,
$body);
return($gemtext);
}
sub xhtml_to_gemtext {
my ($post) = @_;
my $xhtml = HTML::TreeBuilder::XPath->new;
$xhtml->implicit_tags(1);
$xhtml->no_space_compacting(0);
$xhtml->parse($post)
or die("Could not parse post content : $!\n");
my %prefix = (
'h1' => "# ",
'h2' => "## ",
'h3' => "### ",
'h4' => "### ",
'h5' => "### ",
'h6' => "### ",
);
for my $pre ($xhtml->findnodes('//pre')) {
# try to address conversion of PRE elements
my $text = $pre->as_text;
$text =~ s/^/> /gms;
$pre->delete_content;
$pre->push_content($text);
}
# blockquotes, flatten to one layer because that's what gemtext handles
for my $blockquote ($xhtml->findnodes('//blockquote')) {
# try to address conversion of BLOCKQUOTE elements
my $new_bq = HTML::Element->new('~literal');
# check all nodes
for my $node ($blockquote->content_list) {
my $new_node = HTML::Element->new('~literal', text=> '');
# iterate through nodes under the blockquote
# and add them to the new structure
if (! ref($node)) {
# skip it is a blank line
if ($node =~ m/^\s+$/ms ) {
next;
}
$new_node->push_content('> ' . $node . "\n\n");
} else {
# if the node contains an element or hierarchy
$new_node->push_content('> ' . $node->as_text . "\n\n");
for my $anchor ($node->findnodes('.//a[@href]')) {
my $link = &gemtext_link($anchor);
$new_node->push_content($link."\n");
}
$new_node->push_content("\n\n");
}
$new_bq->push_content($new_node);
}
for my $anchor ($blockquote->findnodes('.//a[@href]')) {
my $link = &gemtext_link($anchor);
$new_bq->push_content($link."\n");
}
$blockquote->delete_content;
$blockquote->push_content($new_bq->as_text);
}
# replace images with links to alt text or titles
for my $anchor ($xhtml->findnodes("//a[img]")) {
my $tmp = HTML::Element->new('~literal');
for my $img ($anchor->findnodes("./img")) {
my $title;
if (defined($img->attr('src'))) {
my $src = $img->attr('src');
my $text = $img->attr('alt') || $img->attr('title') || '';
my $u = URI->new_abs($src, 'https://techrights.org/');
my $url = $u->canonical;
my $link = '';
my $external = '';
my ($scheme, $host) =
($url =~ m|^(\w+):/+([^/][\w\d\+\-\.]+)|);
if (!$host) {
$host = '';
}
if ($host !~ m/techrights\.org$/) {
$external = '↺ ';
}
if ($text) {
if ($url !~ m/^gemini:/) {
# gemini is not in URI module
my $s = ' '.uc($u->scheme).' ' || '';
$link = qq(\n=>\t$url\t).
$external.$s.
qq(image: $text\n);
} else {
$link = qq(\n=>\t$url\t).$external.qq(image: $text\n);
}
} else {
if ($url !~ m/^gemini/) {
# gemini is not in URI module
my $s = uc($u->scheme).' ' || '';
$link = qq(\n=>\t$url\t).$external.qq(unlabeled ).
$s.qq(image\n);
} else {
$link = qq(\n=>\t$url\t).$external
.qq(unlabeled image\n);
}
}
$tmp->push_content($link);
}
}
$anchor->replace_with($tmp);
}
my $tmp = HTML::Element->new('~literal');
for my $img ($xhtml->findnodes('//img[@alt]')) {
my $alt;
if (defined($img->attr('alt')) && $img->attr('alt')) {
$alt = "\n> " . $img->attr('alt');
$tmp->push_content($alt);
$img->replace_with($tmp);
}
}
# format headings, plus any links they might contain
foreach my $hn (1 .. 5) {
$hn = qq(h$hn);
for my $heading ($xhtml->findnodes(".//$hn")) {
my $h = "";
if (defined($prefix{$hn})) {
$h .= $prefix{$hn};
}
$h = qq(\n).$h.$heading->as_text.qq(\n\n);
my $tmp = HTML::Element->new('~literal');
$tmp->push_content($h);
for my $anchor ($heading->findnodes('./a[@href]')) {
my $link = &gemtext_link($anchor);
$tmp->push_content($link."\n");
}
$tmp->push_content("\n");
$heading->replace_with($tmp);
}
}
# ordered lists, only one layer deep
for my $ol ($xhtml->findnodes('//ol')) {
my $item = 1;
for my $li ($ol->findnodes('./li')) {
my $href ='';
my $new_li = HTML::Element->new('~literal');
$new_li->push_content("* $item ".$li->as_text."\n\n");
for my $anchor ($li->findnodes('./a[@href]')) {
my $link = &gemtext_link($anchor);
$new_li->push_content($link."\n");
}
$item++;
$li->replace_with($new_li);
}
$ol->push_content("\n");
}
# unordered lists, only one layer deep
for my $ul ($xhtml->findnodes('//ul')) {
for my $li ($ul->findnodes('./li')) {
my $new_li = HTML::Element->new('~literal');
my $listcontent = $li->as_text;
$listcontent =~ s/\s+$//gm;
$listcontent =~ s/^\s+//gm;
my $href ='';
$new_li->push_content('* '.$listcontent."\n");
for my $anchor ($li->findnodes('./a[@href]')) {
my $link = &gemtext_link($anchor);
$new_li->push_content($link."\n");
}
$li->replace_with($new_li);
}
$ul->push_content("\n");
}
# any remaining paragraphs
for my $pp ($xhtml->findnodes('//p')) {
my $href ='';
my $new_pp = HTML::Element->new('~literal');
my $as_text = $pp->as_text;
$as_text =~ s/^\s+//g;
$as_text =~ s/\s+$//g;
$new_pp->push_content($as_text."\n\n");
for my $anchor ($pp->findnodes('./a[@href]')) {
my $link = &gemtext_link($anchor);
$new_pp->push_content($link."\n");
}
$new_pp->push_content("\n");
$pp->replace_with($new_pp);
}
# any remaining links
for my $anchor ($xhtml->findnodes('//a[@href]')) {
my $new_anchor = HTML::Element->new('~literal');
my $link = &gemtext_link($anchor);
$new_anchor->push_content($link."\n\n");
$anchor->replace_with($new_anchor);
}
$post = $xhtml->as_text;
$xhtml->destroy;
while ($post =~ s/\n\n\n/\n\n/gms) { 1 }
while ($post =~ s/^\*\s+#/#/gms) { 1 }
return($post);
}
sub gemtext_link {
my ($anchor) = @_;
my $href = $anchor->attr('href');
my $text = $anchor->as_text;
chomp($text);
$text =~ s/^\s+//g;
if (defined($anchor->attr('class'))) {
if ($anchor->attr('class') eq 'readon') {
if (defined($anchor->attr('title'))) {
my $title = $anchor->attr('title') || 0;
if ($title) {
$text = "Read On: $title";
}
}
}
}
my $external = '';
my $u = URI->new_abs($href, 'https://techrights.org/');
my $url = $u->canonical;
$url =~ s{^https?://[^/]*techrights.org(/n.*)\.s?html}
{$1.gmi}x;
my ($scheme, $host) = ($url =~ m|^(\w+):/*([^/][\w\d\+\-\.]+)|);
if (!$host) {
$host = '';
}
if (!$scheme) {
$scheme = '';
}
if ($host && $host !~ m/techrights\.org$/) {
$external = '↺ ';
}
if ($scheme ne 'gemini') {
if ($scheme) {
$scheme = uc($scheme).': ';
}
$href = $url;
$text = $external.$scheme.$text;
} else {
if (!$external) {
# even the old relative links are in /n/ in Gemini
$href =~ s|^/o/([0-9]{4})/|/n/$1/|;
$href =~ s|\.s?html$|.gmi|;
} else {
$text = $external.$text;
}
$href = $url;
}
my $link = "=>\t$href\t$text";
return($link);
}
sub new_gemtext_document {
my ($title,$pdate,$author,$prevlink,$nextlink,$post) = @_;
$title =~ s/\n/ /gm;
$title =~ s/\s+/ /g;
my $gemtext = <<"EOGEMTEXT";
Techrights
# $title
Posted by $author on $pdate
$nextlink
$prevlink
$post
=> / gemini.techrights.org
EOGEMTEXT
return($gemtext);
}
sub write_gemtext {
my ($recno, $path, $slug, $ballast, $gemtext, $draft) = @_;
my $file;
if ($ballast) {
$file = "$path$slug.$ballast.gmi";
} else {
$file = "$path$slug.gmi";
}
if (! &prepare_directory($path)) {
return(0);
}
if (! &is_file_writable($file)) {
warn("'$slug' could not be written\n");
return(0);
}
print " Fg: $file\n" if ($VERBOSE);
my $doc;
# the $gemtext variable does not write out correctly to utf-8
# $gemtext = encode('UTF-8', $gemtext);
# open($doc, '>', $file)
# open($doc, '>:utf8', $file)
# $gemtext = encode('UTF-8', $gemtext);
open($doc, '>', $file)
or die("Could not open '$file' for writing: $!\n");
print $doc $gemtext;
close($doc);
return(1);
}
sub is_file_writable {
my ($file) = @_;
# overwrite by default
if (-e $file) {
if (-f $file) {
if (-w $file) {
return(1);
} else {
warn("Destination '$file' is not writable\n");
return(0);
}
} else {
warn("Destination '$file' is not a regular file\n");
return(0);
}
} else {
return(1);
}
}
sub move_finished_drafts {
my ($dbh) = @_;
my $query = qq(SELECT draft_keys.recno,draft_keys.date,draft_keys.slug,
draft_keys.ballast,draft_keys.written
FROM draft_keys
WHERE written=2
ORDER BY draft_keys.recno ASC);
my $sth = $dbh->prepare($query);
$sth->execute()
or die("\n");
while (my $data = $sth->fetchrow_hashref) {
my $draft_recno = $data->{'recno'};
my $date = $data->{'date'};
my $slug = $data->{'slug'};
my ($recno, $ballast) = &get_next_available_recno($dbh, $date,
$slug, 0);
$query = qq(INSERT INTO keys
SELECT ?,0,date,?,slug
FROM draft_keys
WHERE draft_keys.recno=?);
my $sth = $dbh->prepare($query);
eval {
$sth->execute($recno, $ballast, $draft_recno);
};
if($@) {
$sth->finish;
$dbh->rollback;
die("Could not update $draft_recno → $recno from draft '$query': $!\n");
}
my @queries= (
qq(INSERT INTO metadata
SELECT ?,term,value
FROM draft_metadata
WHERE draft_metadata.recno=?),
qq(INSERT INTO body
SELECT ?,body
FROM draft_body
WHERE draft_body.recno=?),
qq(INSERT INTO rawtext_body
SELECT ?,fulltext
FROM draft_rawtext
WHERE draft_rawtext.recno=?),
qq(INSERT INTO rawtext_metadata
SELECT ?, t1.value || ' ' || t2.value AS fulltext
FROM draft_metadata AS t1
JOIN draft_metadata AS t2
ON t2.recno = t1.recno
WHERE t1.term = "dc.title"
AND t2.term = "dc.description"
AND t1.recno = ?),
);
for my $query (@queries) {
my $sth = $dbh->prepare($query);
eval {
$sth->execute($recno, $draft_recno);
};
if($@) {
$dbh->rollback;
die("Could not update $draft_recno → $recno"
. " from draft '$query': $!\n");
}
$sth->finish;
}
@queries = (
qq(DELETE FROM draft_keys WHERE recno=?),
qq(DELETE FROM draft_metadata WHERE recno=?),
qq(DELETE FROM draft_body WHERE recno=?),
qq(DELETE FROM draft_rawtext WHERE recno=?),
);
for my $query (@queries) {
$sth = $dbh->prepare($query);
eval {
$sth->execute($draft_recno);
};
if($@) {
$dbh->rollback;
die("Could not delete draft '$query': $!\n");
}
$sth->finish;
}
# ballast == 0 for drafts, recno is in place of slug for drafts
&delete_draft_or_file($draft_recno, $xhtml_draft_path, $draft_recno,
0, 'shtml');
&delete_draft_or_file($draft_recno, $gemtext_draft_path, $draft_recno,
0, 'gmi');
}
$dbh->commit();
return(1);
}
sub extract_and_write_drafts {
my ($dbh) = @_;
my $draft_status = 1;
print " Draft XHTML Path: $xhtml_draft_path\n" if ($VERBOSE);
print " Draft GemText Path: $gemtext_draft_path\n" if ($VERBOSE);
my $query = qq(SELECT draft_keys.recno,draft_keys.date,draft_keys.slug,
draft_keys.ballast,draft_keys.written
FROM draft_keys
WHERE written=0
ORDER BY draft_keys.recno ASC);
my $sth;
$sth = $dbh->prepare($query)
or die($sth->errstr."\n");
$sth->execute()
or die($sth->errstr."\n");
# loop through the found records containing drafts
while (my $data = $sth->fetchrow_hashref) {
my $recno = $data->{'recno'};
my $slug = $data->{'slug'};
my $ballast = $data->{'ballast'};
my $date_created = $data->{'date'};
my $pdate = strftime("%b %d, %Y", gmtime());
# xhtml activities
if (-d $xhtml_draft_path) {
# http / https
my ($head, $title, $author, $date_created, $date_modified) =
&fetch_head($dbh, $recno, $draft_status);
$head = "<!-- $recno -->\n".$head;
my $body = &fetch_xhtml_body($dbh, $recno, $draft_status);
my $xhtml = &new_xhtml_document($title,$pdate,'draft',
'','',$head,$body);
&write_xhtml($dbh, $recno, $xhtml_draft_path, $recno, 0,
$xhtml, 1);
}
# gemtext activities
if (-d $gemtext_draft_path) {
# gemini
my ($head, $title, $author, $date_created, $date_modified) =
&fetch_head($dbh, $recno, $draft_status);
my $body = &fetch_xhtml_body($dbh, $recno, $draft_status);
$body = &xhtml_to_gemtext($body);
$title = decode_entities($title);
my $gemtext = &new_gemtext_document($title,$pdate,'draft',
'', '', $body);
&write_gemtext($recno, $gemtext_draft_path, $recno, 0,
$gemtext, 1);
}
}
$sth->finish;
return(1);
}
sub delete_draft_or_file {
my ($recno, $path, $slug, $ballast, $suffix) = @_;
my $file;
if ($ballast) {
$file = "$path/$slug.$ballast.$suffix";
} else {
$file = "$path/$slug.$suffix";
}
if ($VERBOSE > 1) {
print qq(Unlinking '$file'\n);
}
if (-f $file) {
if (unlink($file)) {
return(1);
} else {
warn("Could not unlink file '$file' : $!\n");
return(0);
}
}
}
sub update_dc_dates {
my ($dbh, $recno, $dc_date_created) = @_;
# DC.Date.Created and DC.Date.Modified
my $sth = $dbh->prepare('UPDATE metadata
SET value=?
WHERE recno=?
AND (
term="dc.date.created"
OR
term="dc.date.modified"
)');
eval {
$sth->execute($dc_date_created, $recno);
};
if($@) {
$sth->finish;
$dbh->rollback;
die("Could not adjust DC Dates in metadata table: $!\n");
}
$sth->finish;
$dbh->commit;
return(1);
}
sub get_next_available_recno {
my ($dbh, $date, $slug, $draft) = @_;
my $recno;
$date =~ s/T.*//;
$date =~ s/-//g;
my $sth;
if ($draft) {
$sth = $dbh->prepare('SELECT * FROM draft_keys WHERE date=? AND slug=?
ORDER BY ballast DESC LIMIT 1');
} else {
$sth = $dbh->prepare('SELECT * FROM keys WHERE date=? AND slug=?
ORDER BY ballast DESC LIMIT 1');
}
$sth->execute($date,$slug);
my $ballast = 0;
if (my $row = $sth->fetchrow_hashref) {
# same slug in use already, add ballast to it
$ballast = $row->{'ballast'} + 1;
$sth->finish;
# return(0);
}
# get the next draft or post record number
if ($draft) {
$sth = $dbh->prepare('SELECT max(recno) FROM draft_keys');
} else {
$sth = $dbh->prepare('SELECT max(recno) FROM keys');
}
$sth->execute();
my $row = $sth->fetch;
$recno = $row->[0] ? $row->[0]+1 : 1;
$sth->finish;
# print "Next record = $recno\n";
return($recno, $ballast);
}
Generator/tr-stats-weekly-pages.pl
#!/usr/bin/perl
# reads from stdin and writes to stdout
# processes Apache log files in their default formmat
# and counts which URLs have been accessed most
use Date::Calc qw(Time_to_Date Delta_Days Today Add_Delta_Days);
use Date::Parse;
use open qw(:std :utf8);
use Getopt::Long;
use IO::Interactive qw(is_interactive);
use strict;
use warnings;
our %opt = (
's' => 0,
'sorted' => 0,
'status' => 0,
'table' => 0,
'h' => 0,
'v' => 0,
);
GetOptions ("help|h" => \$opt{'h'},
"sorted" => \$opt{'sorted'},
"status|s:s@" => \$opt{'s'},
"table|t" => \$opt{'table'},
"verbose|v:+" => \$opt{'v'});
if ($opt{'h'}) {
&usage($0);
exit(0);
}
# check if there is input from a pipe or redirection
if (is_interactive) {
&usage($0);
exit(1);
}
# note if HTTP response status is to be used
our $allstatus = 0;
my %status = ();
if ($opt{'s'}) {
for my $s (@{$opt{'s'}}) {
if ($s eq '') {
# show all statuses
$allstatus = 1;
last;
}
# show selected statuses
for my $ss (split(/,/, $s)) {
$status{$ss} = 1;
}
}
} else {
# ignore status
$allstatus = -1;
}
my ($y,$m,$d) = Today(1);
my %p = ();
my %s = ();
# process logs via stdin
while (my $line = <>) {
# ignore known bots
next if (
$line =~ m{api.slack.com/robots} or
$line =~ m{dataforseo.com/dataforseo-bot} or
$line =~ m{www.semrush.com/bot.table} or
$line =~ m{mj12bot.com} or
$line =~ m{opensiteexplorer.org/dotbot} or
$line =~ m{opensiteexplorer.org/dotbot} or
$line =~ m{www.baidu.com/search/spider.table} or
$line =~ m{webmaster.petalsearch.com/site/petalbot} or
$line =~ m{www.apple.com/go/applebot} or
$line =~ m{www.bing.com/bingbot.htm} or
$line =~ m{www.google.com/bot.table} or
$line =~ m{www.scoop.it/bot.table} or
$line =~ m{semantic-visions.com} or
$line =~ m{ahrefs.com/robot/} or
$line =~ m{ClaudeBot} or
$line =~ m{35.204.117.96\s} or
$line =~ m{183.242.45.97\s} or
$line =~ m{49.207.241.7\s} or
$line =~ m{168.138.139.75\s} or
$line =~ m{46.183.221.14\s} or
$line =~ m{/feed}
);
chomp $line;
# my ( $host ) = ( $line =~ m{^(\S+)\s}u );
my ( $date ) = ( $line =~ m{\[([^\]]+)\]} );
my ( $path, $status ) = ( $line =~ m|"GET ([^ ]+)[^"]+" ([0-9]{3})|u );
if (! $path) {
next;
}
my $time = str2time($date);
my ($year,$month,$day, $hour,$minute,$second, $doy,$dow,$dst) =
Time_to_Date($time);
my $dd = Delta_Days( $year,$month,$day, $y,$m,$d);
if ($opt{'v'}>1) {
print "DD=$dd\t( $year,$month,$day, $y,$m,$d)\n";
}
if ($dd < 8 && $dd > 0) {
# one week of data, starting yesterday
$p{$path}++;
$s{$path} = $status; # keep only oldest status for URL path
} elsif ( $opt{'sorted'} && $dd >= 8 ) {
# exit read loop if told that the data was sorted and date exceeded
last;
}
}
if ($opt{'table'}) {
my ($y1, $m1, $d1) = Add_Delta_Days($y, $m, $d, -1);
my ($y2, $m2, $d2) = Add_Delta_Days($y, $m, $d, -7);
my $caption = sprintf("Span from %04d-%02d-%02d to %04d-%02d-%02d",
$y2, $m2, $d2, $y1, $m1, $d1);
&print_table(\%p, \%s, $caption );
} else {
&print_text(\%p, \%s);
}
exit(0);
sub usage {
my ($script) = @_;
$script =~ s|.*/||;
print qq(cat log | $script [options]\n);
print qq(\n);
print qq(Read Apache logs from stdin and count which URLs have been );
print qq(accessed from yesterday until a week ago.\n);
print qq(\n);
print qq( -s, --status [n[,n]...] include HTTP response statuses \n);
print qq( or choose which status(es) to count, if specified\n);
print qq( --sorted log file data is already pre-sorted chronologically\n);
print qq( truncates input after date range\n);
print qq( -t, --table format output as an HTML table\n);
print qq( -h, --help this help text\n);
print qq( -v, --verbose increase notification level verbosity\n);
return(1);
}
sub print_table {
my ( $p, $s, $caption ) = @_;
print qq(<table class="log-stats">\n);
print qq(<caption>$caption</caption>\n);
if ( $allstatus eq 1 ) {
if ($opt{'v'}) {
print "Allstatus\n";
}
foreach my $path (sort{ $p{$b} <=> $p{$a} } keys %p) {
print qq(<tr><td>$p{$path}</td> <td>$s{$path}</td>\t);
print qq(<td><a href="$path">$path</a></td></tr>\n);
}
} elsif ( $allstatus eq 0) {
if ($opt{'v'}) {
print "selected statuses\n";
}
foreach my $path (sort{ $p{$b} <=> $p{$a} } keys %p) {
if ($status{$s{$path}} ) {
print qq(<tr><td>$p{$path}</td> <td>$s{$path}</td>\t);
print qq(<td><a href="$path">$path</a></td></tr>\n);
}
}
} else {
foreach my $path (sort{ $p{$b} <=> $p{$a} } keys %p) {
print qq(<tr><td>$p{$path}</td>\t);
print qq(<td><a href="$path">$path</a></td></tr>\n);
}
}
print qq(</table>\n);
return(1);
}
sub print_text {
my ( $p, $s ) = @_;
if ( $allstatus eq 1 ) {
foreach my $path (sort{ $p{$b} <=> $p{$a} } keys %p) {
print "$p{$path}\t$s{$path}\t$path\n";
}
} elsif ( $allstatus eq 0 ) {
foreach my $path (sort{ $p{$b} <=> $p{$a} } keys %p) {
if ($status{$s{$path}} ) {
print "$p{$path}\t$s{$path}\t$path\n";
}
}
} else {
foreach my $path (sort{ $p{$b} <=> $p{$a} } keys %p) {
print "$p{$path}\t$path\n";
}
}
return(1);
}