#!/usr/bin/env perl

# run with:
# (cd pombe-embl/external_data/disease/
#  ./combine_malacards_data.pl malacards_elite_genes_score_above_100.tsv MalaCards_v480_DOids.tsv > malacards_data_for_chado.tsv)


use warnings;
use strict;

die unless @ARGV == 2;

open my $doids_fh, '<', $ARGV[1] or die;;


my %doids_map = ();

while (<$doids_fh>) {
  chomp $_;

  next unless length $_ > 0;

  my @bits = split /\t/, $_;

  $doids_map{$bits[0]} = $bits[2];

}

close $doids_fh;



open my $malacard_table_fh, '<', $ARGV[0] or die;;

while (<$malacard_table_fh>) {
  chomp $_;
  my ($disease_name, $disease_slug, $displayed_disease_name, $gene_symbol,
      $is_elite, $is_cancer_census, $gene_disorder_score,
      $source_implication, $pubmed_ids) = split /\t/, $_;

  if ($disease_name eq 'DiseaseName') {
    # ignore the header
    next;
  }

  my $doid = $doids_map{$disease_slug} || '';

  print "$disease_name\t$disease_slug\t$displayed_disease_name\t$gene_symbol\t$doid\n"
}

close $malacard_table_fh;

