Module Bio::Alignment::Output
In: lib/bio/alignment.rb  (CVS)

Methods

Public Instance methods

common routine for interleaved/non-interleaved phylip format

[Source]

# File lib/bio/alignment.rb, line 1099
      def __output_phylip_common(options = {})
        len = self.alignment_length
        aln = [ " #{self.number_of_sequences} #{len}\n" ]
        sn = self.sequence_names.collect { |x| x.to_s.gsub(/[\r\n\x00]/, ' ') }
        if options[:replace_space]
          sn.collect! { |x| x.gsub(/\s/, '_') }
        end
        if !options.has_key?(:escape) or options[:escape]
          sn.collect! { |x| x.gsub(/[\:\;\,\(\)]/, '_') }
        end
        if !options.has_key?(:split) or options[:split]
          sn.collect! { |x| x.split(/\s/)[0].to_s }
        end
        if !options.has_key?(:avoid_same_name) or options[:avoid_same_name]
          sn = __clustal_avoid_same_name(sn, 10)
        end

        namewidth = 10
        seqwidth  = (options[:width] or 60)
        seqwidth = seqwidth.div(10) * 10
        seqregexp = Regexp.new("(.{1,#{seqwidth.div(10) * 11}})")
        gchar = (options[:gap_char] or '-')

        aseqs = Array.new(self.number_of_sequences).clear
        self.each_seq do |s|
          aseqs << s.to_s.gsub(self.gap_regexp, gchar)
        end
        case options[:case].to_s
        when /lower/i
          aseqs.each { |s| s.downcase! }
        when /upper/i
          aseqs.each { |s| s.upcase! }
        end
        
        aseqs.collect! do |s|
          snx = sn.shift
          head = sprintf("%*s", -namewidth, snx.to_s)[0, namewidth]
          head2 = ' ' * namewidth
          s << (gchar * (len - s.length))
          s.gsub!(/(.{1,10})/n, " \\1")
          s.gsub!(seqregexp, "\\1\n")
          a = s.split(/^/)
          head += a.shift
          ret = a.collect { |x| head2 + x }
          ret.unshift(head)
          ret
        end
        lines = (len + seqwidth - 1).div(seqwidth)
        [ aln, aseqs, lines ]
      end

[Source]

# File lib/bio/alignment.rb, line 873
      def output(format, *arg)
        case format
        when :clustal
          output_clustal(*arg)
        when :fasta
          output_fasta(*arg)
        when :phylip
          output_phylip(*arg)
        when :phylipnon
          output_phylipnon(*arg)
        when :msf
          output_msf(*arg)
        when :molphy
          output_molphy(*arg)
        else
          raise "Unknown format: #{format.inspect}"
        end
      end

Generates ClustalW-formatted text

seqs:sequences (must be an alignment object)
names:names of the sequences
options:options

[Source]

# File lib/bio/alignment.rb, line 1045
      def output_clustal(options = {})
        __clustal_formatter(self, self.sequence_names, options)
      end

Generates fasta format text and returns a string.

[Source]

# File lib/bio/alignment.rb, line 1059
      def output_fasta(options={})
        #(original)
        width = (options[:width] or 70)
        if options[:avoid_same_name] then
          na = __clustal_avoid_same_name(self.sequence_names, 30)
        else
          na = self.sequence_names.collect do |k|
            k.to_s.gsub(/[\r\n\x00]/, ' ')
          end
        end
        if width and width > 0 then
          w_reg = Regexp.new(".{1,#{width}}")
          self.collect do |s|
            ">#{na.shift}\n" + s.to_s.gsub(w_reg, "\\0\n")
          end.join('')
        else
          self.collect do |s|
            ">#{na.shift}\n" + s.to_s + "\n"
          end.join('')
        end
      end

Generates Molphy alignment format text as a string

[Source]

# File lib/bio/alignment.rb, line 1151
      def output_molphy(options = {})
        len = self.alignment_length
        header = "#{self.number_of_sequences} #{len}\n"
        sn = self.sequence_names.collect { |x| x.to_s.gsub(/[\r\n\x00]/, ' ') }
        if options[:replace_space]
          sn.collect! { |x| x.gsub(/\s/, '_') }
        end
        if !options.has_key?(:escape) or options[:escape]
          sn.collect! { |x| x.gsub(/[\:\;\,\(\)]/, '_') }
        end
        if !options.has_key?(:split) or options[:split]
          sn.collect! { |x| x.split(/\s/)[0].to_s }
        end
        if !options.has_key?(:avoid_same_name) or options[:avoid_same_name]
          sn = __clustal_avoid_same_name(sn, 30)
        end

        seqwidth  = (options[:width] or 60)
        seqregexp = Regexp.new("(.{1,#{seqwidth}})")
        gchar = (options[:gap_char] or '-')

        aseqs = Array.new(len).clear
        self.each_seq do |s|
          aseqs << s.to_s.gsub(self.gap_regexp, gchar)
        end
        case options[:case].to_s
        when /lower/i
          aseqs.each { |s| s.downcase! }
        when /upper/i
          aseqs.each { |s| s.upcase! }
        end
        
        aseqs.collect! do |s|
          s << (gchar * (len - s.length))
          s.gsub!(seqregexp, "\\1\n")
          sn.shift + "\n" + s
        end
        aseqs.unshift(header)
        aseqs.join('')
      end

Generates msf formatted text as a string

[Source]

# File lib/bio/alignment.rb, line 1193
      def output_msf(options = {})
        len = self.seq_length

        if !options.has_key?(:avoid_same_name) or options[:avoid_same_name]
          sn = __clustal_avoid_same_name(self.sequence_names)
        else
          sn = self.sequence_names.collect do |x|
            x.to_s.gsub(/[\r\n\x00]/, ' ')
          end
        end
        if !options.has_key?(:replace_space) or options[:replace_space]
          sn.collect! { |x| x.gsub(/\s/, '_') }
        end
        if !options.has_key?(:escape) or options[:escape]
          sn.collect! { |x| x.gsub(/[\:\;\,\(\)]/, '_') }
        end
        if !options.has_key?(:split) or options[:split]
          sn.collect! { |x| x.split(/\s/)[0].to_s }
        end

        seqwidth = 50
        namewidth = [31, sn.collect { |x| x.length }.max ].min
        sep = ' ' * 2

        seqregexp = Regexp.new("(.{1,#{seqwidth}})")
        gchar = (options[:gap_char]  or '.')
        pchar = (options[:padding_char] or '~')

        aseqs = Array.new(self.number_of_sequences).clear
        self.each_seq do |s|
          aseqs << s.to_s.gsub(self.gap_regexp, gchar)
        end
        aseqs.each do |s|
          s.sub!(/\A#{Regexp.escape(gchar)}+/) { |x| pchar * x.length }
          s.sub!(/#{Regexp.escape(gchar)}+\z/, '')
          s << (pchar * (len - s.length))
        end

        case options[:case].to_s
        when /lower/i
          aseqs.each { |s| s.downcase! }
        when /upper/i
          aseqs.each { |s| s.upcase! }
        else #default upcase
          aseqs.each { |s| s.upcase! }
        end

        case options[:type].to_s
        when /protein/i, /aa/i
          amino = true
        when /na/i
          amino = false
        else
          if seqclass == Bio::Sequence::AA then
            amino = true
          elsif seqclass == Bio::Sequence::NA then
            amino = false
          else
            # if we can't determine, we asuume as protein.
            amino = aseqs.size
            aseqs.each { |x| amino -= 1 if /\A[acgt]\z/i =~ x }
            amino = false if amino <= 0
          end
        end

        seq_type = (amino ? 'P' : 'N')

        fn = (options[:entry_id] or self.__id__.abs.to_s + '.msf')
        dt = (options[:time] or Time.now).strftime('%B %d, %Y %H:%M')

        sums = aseqs.collect { |s| GCG::Seq.calc_checksum(s) }
        #sums = aseqs.collect { |s| 0 }
        sum = 0; sums.each { |x| sum += x }; sum %= 10000
        msf =
          [
           "#{seq_type == 'N' ? 'N' : 'A' }A_MULTIPLE_ALIGNMENT 1.0\n",
           "\n",
           "\n",
           " #{fn}  MSF: #{len}  Type: #{seq_type}  #{dt}  Check: #{sum} ..\n",
           "\n"
          ]

        sn.each do |snx|
          msf << ' Name: ' +
            sprintf('%*s', -namewidth, snx.to_s)[0, namewidth] +
            "  Len: #{len}  Check: #{sums.shift}  Weight: 1.00\n"
        end
        msf << "\n//\n"

        aseqs.collect! do |s|
          snx = sn.shift
          head = sprintf("%*s", namewidth, snx.to_s)[0, namewidth] + sep
          s.gsub!(seqregexp, "\\1\n")
          a = s.split(/^/)
          a.collect { |x| head + x }
        end
        lines = (len + seqwidth - 1).div(seqwidth)
        i = 1
        lines.times do
          msf << "\n"
          n_l = i
          n_r = [ i + seqwidth - 1, len ].min
          if n_l != n_r then
            w = [ n_r - n_l + 1 - n_l.to_s.length - n_r.to_s.length, 1 ].max
            msf << (' ' * namewidth + sep + n_l.to_s + 
                    ' ' * w + n_r.to_s + "\n")
          else
            msf << (' ' * namewidth + sep + n_l.to_s + "\n")
          end
          aseqs.each { |a| msf << a.shift }
          i += seqwidth
        end
        msf << "\n"
        msf.join('')
      end

generates phylip interleaved alignment format as a string

[Source]

# File lib/bio/alignment.rb, line 1082
      def output_phylip(options = {})
        aln, aseqs, lines = __output_phylip_common(options)
        lines.times do
          aseqs.each { |a| aln << a.shift }
          aln << "\n"
        end
        aln.pop if aln[-1] == "\n"
        aln.join('')
      end

generates Phylip3.2 (old) non-interleaved format as a string

[Source]

# File lib/bio/alignment.rb, line 1093
      def output_phylipnon(options = {})
        aln, aseqs, lines = __output_phylip_common(options)
        aln.first + aseqs.join('')
      end

# to_clustal is deprecated. Instead, please use output_clustal. +

[Source]

# File lib/bio/alignment.rb, line 1053
      def to_clustal(*arg)
        warn "to_clustal is deprecated. Please use output_clustal."
        output_clustal(*arg)
      end

[Validate]