br_bioflat.rb

Path: bin/br_bioflat.rb  (CVS)
Last Update: Fri Apr 06 08:35:39 +0900 2007

bioflat - OBDA flat file indexer (executable)

Copyright:Copyright (C) 2002 Naohisa Goto <ng@bioruby.org>
License:The Ruby License

$Id: br_bioflat.rb,v 1.17 2007/04/05 23:35:39 trevor Exp $

Required files

bio  

Methods

Public Instance methods

[Source]

# File bin/br_bioflat.rb, line 69
def do_index(mode = :create)
  case ARGV[0]
  when /^\-\-?make/
    dbpath = ARGV[1]
    args = ARGV[2..-1]
    is_bdb = nil
  when /^\-\-?make.*bdb/i
    dbname = ARGV[1]
    args = ARGV[2..-1]
    is_bdb = Bio::FlatFileIndex::MAGIC_BDB
  when /^\-\-create/, /^\-\-update/
    args = ARGV[1..-1]
  else
    usage
  end

  options = {}

  while args.first =~ /^\-/
    case x = args.shift

    # OBDA stuff

    when /^\-\-?format/
      args.shift
      format = nil              # throw this f*ckin' mess for auto detect :)
    when /^\-\-?location/
      location = args.shift.chomp('/')
    when /^\-\-?dbname/
      dbname = args.shift
    when /^\-\-?(index)?type/
      indextype = args.shift
      case indextype
      when /bdb/
        is_bdb = Bio::FlatFileIndex::MAGIC_BDB
      when /flat/
        is_bdb = nil
      else
        usage
      end

    # BioRuby extension

    when /^\-\-?files/i
      break

    when /^\-\-?format\=(.*)/i
      format = $1

    when /^\-\-?sort\=(.*)/i
      options['sort_program'] = $1
      options['onmemory'] = nil
    when /^\-\-?no\-?te?mp/i
      options['onmemory'] = true

    when /^\-\-?primary.*\=(.*)/i
      options['primary_namespace'] = $1

    when /^\-\-?add-secondary.*\=(.*)/i
      unless options['additional_secondary_namespaces'] then
        options['additional_secondary_namespaces'] = []
      end
      options['additional_secondary_namespaces'] << $1 if $1.length > 0

    when /^\-\-?secondary.*\=(.*)/i
      unless options['secondary_namespaces'] then
        options['secondary_namespaces'] = []
      end
      options['secondary_namespaces'] << $1 if $1.length > 0

    when /^\-\-?renew/
      options['renew'] = true

    else
      $stderr.print "Warning: ignoring invalid option #{x.inspect}\n"
    end
  end

  dbpath = File.join(location, dbname) unless dbpath
  if mode == :update then
    Bio::FlatFileIndex::update_index(dbpath, format, options, *args)
  else
    Bio::FlatFileIndex::makeindex(is_bdb, dbpath, format, options, *args)
  end
end

[Source]

# File bin/br_bioflat.rb, line 156
def do_search
  dbname = nil
  location = nil
  names = []
  while x = ARGV.shift
    case x
    when /\A\-\-?search/i
      #do nothing
    when /\A\-\-?location/i
      location = ARGV.shift.to_s.chomp('/')
    when /\A\-\-?dbname/i
      dbname = ARGV.shift
    when /\A\-\-?name(?:space)?(?:\=(.+))?/i
      if $1 then
        names << $1
      elsif x = ARGV.shift
        names << x
      end
    else
      ARGV.unshift x
      break
    end
  end
  dbname = ARGV.shift unless dbname
  dbname = File.join(location, dbname) unless location.to_s.empty?
  db = Bio::FlatFileIndex.open(dbname)
  ARGV.each do |key|
    $stderr.print "Searching for \'#{key}\'...\n"
    #r = db.search(key)
    #$stderr.print "OK, #{r.size} entry found\n"
    #if r.size > 0 then
    #  print r
    #end
    begin
      if names.empty? then
        r = db.include?(key)
      else
        r = db.include_in_namespaces?(key, *names)
      end
    rescue RuntimeError
      $stderr.print "ERROR: #{$!}\n"
      next
    end
    r = [] unless r
    $stderr.print "OK, #{r.size} entry found\n"
    r.each do |i|
      print db.search_primary(i)
    end
  end
  db.close
end

[Source]

# File bin/br_bioflat.rb, line 209
def do_show_namespaces
  dbname = nil
  location = nil
  files = nil
  format = nil
  names = []
  while x = ARGV.shift
    case x
    when /\A\-\-?(show\-)?name(space)?s/i
      #do nothing
    when /\A\-\-?location/i
      location = ARGV.shift.to_s.chomp('/')
    when /\A\-\-?dbname/i
      dbname = ARGV.shift
    when /\A\-\-?format(?:\=(.+))?/i
      if $1 then
        format = $1
      elsif x = ARGV.shift
        format = x
      end
    when /\A\-\-?files/i
      files = ARGV
      break
    else
      ARGV.unshift x
      break
    end
  end
  if files then
    k = nil
    files.each do |x|
      k = Bio::FlatFile.autodetect_file(x)
      break if k
    end
    if k then
      $stderr.print "Format: #{k.to_s}\n"
      format = k
    else
      $stderr.print "ERROR: couldn't determine file format\n"
      return
    end
  end
  $stderr.print "Namespaces: (first line: primary namespace)\n"
  if format then
    parser = Bio::FlatFileIndex::Indexer::Parser.new(format)
    print parser.primary.name, "\n"
    puts parser.secondary.keys
  else
    dbname = ARGV.shift unless dbname
    dbname = File.join(location, dbname) unless location.to_s.empty?
    db = Bio::FlatFileIndex.open(dbname)
    puts db.namespaces
    db.close
  end
end

[Source]

# File bin/br_bioflat.rb, line 14
def usage
  print "Search:\n  \#{$0} [--search] [options...] [DIR/]DBNAME KEYWORDS\nor\n  \#{$0} [--search] --location DIR --dbname DBNAME [options...] KEYWORDS\n\nSearch options:\n  --namespace NAME       set serch namespace to NAME\n  (or --name NAME)         You can set this option many times to specify\n                           more than one namespace.\n\nCreate index:\n  \#{$0} --create --location DIR --dbname DBNAME [--format <genbank|embl|fasta>] [options...] [--files] FILES\nUpdate index:\n  \#{$0} --update --location DIR --dbname DBNAME [options...] [--files] FILES\n\nCreate index options:\n  --primary=UNIQUE       set primary namespece to UNIQUE\n                           Default primary/secondary namespaces depend on\n                           each format of flatfiles.\n  --secondary=KEY        set secondary namespaces.\n                           You may use this option many times to specify\n                           more than one namespace.\n  --add-secondary=KEY    add secondary namespaces to default specification.\n                           You can use this option many times.\n\nOptions only valid for --create (or --update) --type flat:\n  --sort=/path/to/sort   use external sort program (e.g. /usr/bin/sort)\n  --sort=BUILTIN         use builtin sort routine\n\nOptions only valid for --update:\n  --renew                re-read all flatfiles and update whole index\n\nBackward compatibility:\n  --makeindex DIR/DBNAME\n      same as --create --type flat --location DIR --dbname DBNAME\n  --makeindexBDB DIR/DBNAME\n      same as --create --type bdb  --location DIR --dbname DBNAME\n  --format=CLASS\n      instead of genbank|embl|fasta, specifing a class name is allowed\n\nShow namespaces:\n  \#{$0} --show-namespaces [--location DIR --dbname DBNAME] [DIR/DBNAME]\nor\n  \#{$0} --show-namespaces [--format=CLASS]\nor\n  \#{$0} --show-namespaces --files file\n\n"

end

[Validate]