#!/usr/bin/env perl

use strict;
use warnings;
use Carp;

my $go_site_path = '/var/pomcur/sources/go-site';
my $gorefs_path = "$go_site_path/metadata/gorefs";

my $script_name = $0 =~ s|.*/||r;
print "! output from $script_name\n";
print "! input files: $gorefs_path/*\n";
print "! recreate by running:\n";
print "!   $script_name > go_references.txt\n";
print "! in pombe-embl/supporting_files/\n";
print qq|! after "git pull" in $go_site_path\n\n|;


opendir my $dh, $gorefs_path
  or die "can't open gorefs directory: $!\n";

GO_REF_FILE:
while (defined (my $go_ref_file = readdir $dh)) {
  next unless $go_ref_file =~ /^goref-\d+\.md$/;

  open my $ref_fh, '<', "$gorefs_path/$go_ref_file"
    or die "can't open $go_ref_file: $!\n";

  my $id = undef;
  my $authors = undef;
  my $year = undef;
  my $abstract = '';
  my $is_obsolete = 0;

  my $title = undef;

  while (defined (my $line = <$ref_fh>)) {
    if ($line =~ /^id: "?(.*?)"?$/) {
      $id = $1;
      next;
    }

    if ($line =~ /is_obsolete:\s*true/i) {
      next GO_REF_FILE;
    }

    if ($line =~ /^authors: (.*)/) {
      $authors = $1;
      next;
    }
    if ($line =~ /^year: (.*)/) {
      $year = $1;
      next;
    }
    if ($line =~ /^#+\s*(.*)/) {
      $title = $1;

      if ($title =~ /OBSOLETE/) {
        next GO_REF_FILE;
      }

      next;
    }

    if (defined $title) {
      if ($line =~ /\w+/) {
        if (length $abstract > 0) {
          $line .= "\n$line";
        } else {
          chomp $line;
          $abstract = $line;
        }
      }
    }
  }

  close $ref_fh;

  print "ref_id: $id\n";
  print "title: $title\n" if $title;
  print "year: $year\n" if $year;
  print "authors: $authors\n" if $authors;
  print "abstract: $abstract\n" if $abstract;

  print "\n";
}

closedir $dh;
