// NEWSARC: News de-archiver
//   Copyright 1998 Sean Barrett
//
// Takes a file containing multiple articles written
// out with [t]rn in non-mailbox format, and extracts
// them into HTML files suitable for display on the web.
// Also removes many headers

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>

#define BUFSIZE       4096
char buffer[BUFSIZE];

// more globals... this would have been a lot cleaner
// if ALL the files were globals

FILE *index_file;
char subject[BUFSIZE];
char from[BUFSIZE];

void getline(FILE *f)
{
   buffer[0] = 0;
   fgets(buffer, BUFSIZE, f);
   buffer[BUFSIZE-1] = 0;
}

int article_first_line(void)
{
   char buf2[BUFSIZE];
   int x;
   if (sscanf(buffer, "Article %d of %s", &x, buf2) == 2)
      return 1;
   return 0;
}

int is_empty(void)
{
   char *s = buffer;
   while (*s && isspace(*s))
      ++s;
   if (*s) return 0;
   return 1;
}

char *trim_headers[] =
{
   "Xref: ",    
   "Path: ",
   "Reply-to: ",
   "Followup-to: ",
   "Distribution: ",
   "NNTP-Posting-Host: ",
   "Mime-Version: ",
   "Content-Type: ",
   "Content-Transfer-Encoding: ",
};

void copy_line(char *str, FILE *g)
{
   char *s;
   for (s=str; *s; ++s) {
      switch(*s) {
         case '<': fprintf(g, "&lt;"); break;
         case '>': fprintf(g, "&gt;"); break;
         case '&': fprintf(g, "&amp;"); break;
         default:
            fputc(*s, g);
            break;
      }
   }
}

void process_line(char *str, FILE *g, int *in_header)
{
   if (*in_header) {
      int i;
      if (is_empty()) {
         *in_header = 0;
         fprintf(g, "\n");
         return;
      }

      if (article_first_line())
         return;

      for (i=0; i < sizeof(trim_headers)/sizeof(trim_headers[0]); ++i) {
         if (strncmp(trim_headers[i], str, strlen(trim_headers[i])) == 0)
            return;
      }
      if (strncmp(str, "Subject: ", 9)==0) {
         strcpy(subject, str+9);
         if (subject[strlen(subject)-1] == '\n') subject[strlen(subject)-1] = 0;
      } else if (strncmp(str, "From: ", 6) == 0) {
         strcpy(from, str+6);
         if (from[strlen(from)-1] == '\n') from[strlen(from)-1] = 0;
      }
   }

   copy_line(str, g);
}

char *ARTICLE_HEADER =
     "<HTML>"
     "<HEAD>"
     "</HEAD>"
     "<BODY>"
     "<PRE>"
     "\n";

char *ARTICLE_TAIL =
     "</PRE>"
     "</BODY>"
     "</HTML>"
     "\n";

// leading spaces in the index file mean
// you can run it out to 'sort' and it will
// leave the head at the top and the tail at the bottom
char *INDEX_HEAD =
     "  <HTML><HEAD></HEAD><BODY>"
     "<ul>\n";
char *INDEX_TAIL =
     "</ul></BODY></HTML>";

void copy_article(FILE *f, char *output_dir, int n)
{
   char filename[256];
   FILE *g;
   int in_header;

   // skip whitespace
   while (is_empty() && !feof(f)) {
      getline(f);
   }

   if (feof(f)) return;

   sprintf(filename, "%s/news%04d.html", output_dir, n);
   g = fopen(filename, "w");
   if (!g) {
      fprintf(stderr, "Failed to open output file '%s'\n", filename);
      exit(1);
   }

   fputs(ARTICLE_HEADER, g);
   
   in_header = 1;

   strcpy(subject, "<no Subject:>");
   strcpy(from, "<no From:>");

   do {
      process_line(buffer, g, &in_header);
      getline(f);
   } while (!feof(f) && !article_first_line());

   fputs(ARTICLE_TAIL, g);
   fclose(g);

   fprintf(index_file, " <li>");
   process_line(from, index_file, &in_header); // tricky little hack
   fprintf(index_file, ": <a href=\"news%04d.html\">", n);
   process_line(subject, index_file, &in_header); // tricky little hack
   fprintf(index_file, "</a>\n");
}

int main(int argc, char **argv)
{
   FILE *f;
   int n;

   if (argc < 2 || argc > 3) {
      fprintf(stderr, "Usage: newsarc archive-file [output-directory]\n");
      return 1;
   }

   f = fopen(argv[1], "r");
   if (!f) {
      fprintf(stderr, "Couldn't open input file '%s'\n");
      return 2;
   }

   {
      char index_name[256];
      sprintf(index_name, "%s/index.html", argc == 3 ? argv[2] : ".");
      index_file = fopen(index_name, "w");
      if (!index_file) {
         fprintf(stderr, "Failed to open output file '%s'\n", index_name);
         return 3;
      }
      fputs(INDEX_HEAD, index_file);
   }

   getline(f);
   for (n=1; !feof(f); ++n) {
      copy_article(f, argc == 3 ? argv[2] : ".", n);
   }

   fputs(INDEX_TAIL, index_file);
   fclose(index_file);
   fclose(f);
   return 0;
}
