| Class | Bio::SPTR |
| In: |
lib/bio/db/embl/sptr.rb
(CVS)
|
| Parent: | EMBLDB |
| dr | -> | embl_dr |
| Backup Bio::EMBLDB#dr as embl_dr | ||
returns contents in the CC lines.
returns an object of contents in the TOPIC.
returns contents of the "ALTERNATIVE PRODUCTS".
{'Event' => str,
'Named isoforms' => int,
'Comment' => str,
'Variants'=>[{'Name' => str, 'Synonyms' => str, 'IsoId' => str, 'Sequence' => []}]}
CC -!- ALTERNATIVE PRODUCTS:
CC Event=Alternative splicing; Named isoforms=15;
...
CC placentae isoforms. All tissues differentially splice exon 13;
CC Name=A; Synonyms=no del;
CC IsoId=P15529-1; Sequence=Displayed;
returns contents of the "DATABASE".
[{'NAME'=>str,'NOTE'=>str, 'WWW'=>URI,'FTP'=>URI}, ...]
CC -!- DATABASE: NAME=Text[; NOTE=Text][; WWW="Address"][; FTP="Address"].
returns contents of the "MASS SPECTROMETRY".
[{'MW"=>float,'MW_ERR'=>float, 'METHOD'=>str,'RANGE'=>str}, ...]
CC -!- MASS SPECTROMETRY: MW=XXX[; MW_ERR=XX][; METHOD=XX][;RANGE=XX-XX].
CC -!- TISSUE SPECIFICITY: HIGHEST LEVELS FOUND IN TESTIS. ALSO PRESENT CC IN LIVER, KIDNEY, LUNG AND BRAIN. CC -!- TOPIC: FIRST LINE OF A COMMENT BLOCK; CC SECOND AND SUBSEQUENT LINES OF A COMMENT BLOCK.
See also www.expasy.org/sprot/userman.html#CC_line
# File lib/bio/db/embl/sptr.rb, line 613 def cc(topic = nil) unless @data['CC'] cc = Hash.new comment_border= '-' * (77 - 4 + 1) dlm = /-!- / # 12KD_MYCSM has no CC lines. return cc if get('CC').size == 0 cc_raw = fetch('CC') # Removing the copyright statement. cc_raw.sub!(/ *---.+---/m, '') # Not any CC Lines without the copyright statement. return cc if cc_raw == '' begin cc_raw, copyright = cc_raw.split(/#{comment_border}/)[0] cc_raw = cc_raw.sub(dlm,'') cc_raw.split(dlm).each do |tmp| tmp = tmp.strip if /(^[A-Z ]+[A-Z]): (.+)/ =~ tmp key = $1 body = $2 body.gsub!(/- (?!AND)/,'-') body.strip! unless cc[key] cc[key] = [body] else cc[key].push(body) end else raise ["Error: [#{entry_id}]: CC Lines", '"', tmp, '"', '', get('CC'),''].join("\n") end end rescue NameError if fetch('CC') == '' return {} else raise ["Error: Invalid CC Lines: [#{entry_id}]: ", "\n'#{self.get('CC')}'\n", "(#{$!})"].join end rescue NoMethodError end @data['CC'] = cc end case topic when 'ALLERGEN' return @data['CC'][topic] when 'ALTERNATIVE PRODUCTS' return cc_alternative_products(@data['CC'][topic]) when 'BIOPHYSICOCHEMICAL PROPERTIES' return cc_biophysiochemical_properties(@data['CC'][topic]) when 'BIOTECHNOLOGY' return @data['CC'][topic] when 'CATALITIC ACTIVITY' return cc_catalytic_activity(@data['CC'][topic]) when 'CAUTION' return cc_caution(@data['CC'][topic]) when 'COFACTOR' return @data['CC'][topic] when 'DEVELOPMENTAL STAGE' return @data['CC'][topic].to_s when 'DISEASE' return @data['CC'][topic].to_s when 'DOMAIN' return @data['CC'][topic] when 'ENZYME REGULATION' return @data['CC'][topic].to_s when 'FUNCTION' return @data['CC'][topic].to_s when 'INDUCTION' return @data['CC'][topic].to_s when 'INTERACTION' return cc_interaction(@data['CC'][topic]) when 'MASS SPECTROMETRY' return cc_mass_spectrometry(@data['CC'][topic]) when 'MISCELLANEOUS' return @data['CC'][topic] when 'PATHWAY' return cc_pathway(@data['CC'][topic]) when 'PHARMACEUTICAL' return @data['CC'][topic] when 'POLYMORPHISM' return @data['CC'][topic] when 'PTM' return @data['CC'][topic] when 'RNA EDITING' return cc_rna_editing(@data['CC'][topic]) when 'SIMILARITY' return @data['CC'][topic] when 'SUBCELLULAR LOCATION' return cc_subcellular_location(@data['CC'][topic]) when 'SUBUNIT' return @data['CC'][topic] when 'TISSUE SPECIFICITY' return @data['CC'][topic] when 'TOXIC DOSE' return @data['CC'][topic] when 'WEB RESOURCE' return cc_web_resource(@data['CC'][topic]) when 'DATABASE' # DATABASE: NAME=Text[; NOTE=Text][; WWW="Address"][; FTP="Address"]. tmp = Array.new db = @data['CC']['DATABASE'] return db unless db db.each do |e| db = {'NAME' => nil, 'NOTE' => nil, 'WWW' => nil, 'FTP' => nil} e.sub(/.$/,'').split(/;/).each do |line| case line when /NAME=(.+)/ db['NAME'] = $1 when /NOTE=(.+)/ db['NOTE'] = $1 when /WWW="(.+)"/ db['WWW'] = $1 when /FTP="(.+)"/ db['FTP'] = $1 end end tmp.push(db) end return tmp when nil return @data['CC'] else return @data['CC'][topic] end end
CC -!- WEB RESOURCE: NAME=ResourceName[; NOTE=FreeText][; URL=WWWAddress].
# File lib/bio/db/embl/sptr.rb, line 925 def cc_web_resource(data) data.map {|x| entry = {'NAME' => nil, 'NOTE' => nil, 'URL' => nil} x.split(';').each do |y| case y when /NAME=(.+)/ entry['NAME'] = $1.strip when /NOTE=(.+)/ entry['NOTE'] = $1.strip when /URL="(.+)"/ entry['URL'] = $1.strip end end entry } end
# File lib/bio/db/embl/sptr.rb, line 960 def dr(key = nil) unless key embl_dr else embl_dr[key].map {|x| {'Accession' => x[0], 'Version' => x[1], ' ' => x[2], 'Molecular Type' => x[3]} } end end
returns a Hash of information in the DT lines.
hash keys: ['created', 'sequence', 'annotation'] also Symbols acceptable (ASAP): [:created, :sequence, :annotation]
returns a String of information in the DT lines by a given key..
DT DD-MMM-YYY (rel. NN, Created) DT DD-MMM-YYY (rel. NN, Last sequence update) DT DD-MMM-YYY (rel. NN, Last annotation update)
# File lib/bio/db/embl/sptr.rb, line 123 def dt(key = nil) return dt[key] if key return @data['DT'] if @data['DT'] part = self.get('DT').split(/\n/) @data['DT'] = { 'created' => part[0].sub(/\w{2} /,'').strip, 'sequence' => part[1].sub(/\w{2} /,'').strip, 'annotation' => part[2].sub(/\w{2} /,'').strip } end
returns a ENTRY_NAME in the ID line.
# File lib/bio/db/embl/sptr.rb, line 79 def entry_id id_line('ENTRY_NAME') end
returns contents in the feature table.
sp = Bio::SPTR.new(entry)
ft = sp.ft
ft.class #=> Hash
ft.keys.each do |feature_key|
ft[feature_key].each do |feature|
feature['From'] #=> '1'
feature['To'] #=> '21'
feature['Description'] #=> ''
feature['FTId'] #=> ''
feature['diff'] #=> []
feature['original'] #=> [feature_key, '1', '21', '', '']
end
end
{FEATURE_KEY => [{'From' => int, 'To' => int,
'Description' => aStr, 'FTId' => aStr,
'diff' => [original_residues, changed_residues],
'original' => aAry }],...}
returns an Array of the information about the feature_name in the feature table.
[{'From' => str, 'To' => str, 'Description' => str, 'FTId' => str},...]
Col Data item ----- ----------------- 1- 2 FT 6-13 Feature name 15-20 `FROM' endpoint 22-27 `TO' endpoint 35-75 Description (>=0 per key) ----- -----------------
Note: ‘FROM’ and ‘TO’ endopoints are allowed to use non-numerial charactors including ’<’, ’>’ or ’?’. (c.f. ’<1’, ’?42’)
See also www.expasy.org/sprot/userman.html#FT_line
# File lib/bio/db/embl/sptr.rb, line 1025 def ft(feature_key = nil) return ft[feature_key] if feature_key return @data['FT'] if @data['FT'] table = [] begin get('FT').split("\n").each do |line| if line =~ /^FT \w/ feature = line.chomp.ljust(74) table << [feature[ 5..12].strip, # Feature Name feature[14..19].strip, # From feature[21..26].strip, # To feature[34..74].strip ] # Description else table.last << line.chomp.sub!(/^FT +/, '') end end # Joining Description lines table = table.map { |feature| ftid = feature.pop if feature.last =~ /FTId=/ if feature.size > 4 feature = [feature[0], feature[1], feature[2], feature[3, feature.size - 3].join(" ")] end feature << if ftid then ftid else '' end } hash = {} table.each do |feature| hash[feature[0]] = [] unless hash[feature[0]] hash[feature[0]] << { # Removing '<', '>' or '?' in FROM/TO endopoint. 'From' => feature[1].sub(/\D/, '').to_i, 'To' => feature[2].sub(/\D/, '').to_i, 'Description' => feature[3], 'FTId' => feature[4].to_s.sub(/\/FTId=/, '').sub(/\.$/, ''), 'diff' => [], 'original' => feature } case feature[0] when 'VARSPLIC', 'VARIANT', 'VAR_SEQ', 'CONFLICT' case hash[feature[0]].last['Description'] when /(\w[\w ]*\w*) - ?> (\w[\w ]*\w*)/ original_res = $1 changed_res = $2 original_res = original_res.gsub(/ /,'').strip chenged_res = changed_res.gsub(/ /,'').strip when /Missing/i original_res = seq.subseq(hash[feature[0]].last['From'], hash[feature[0]].last['To']) changed_res = '' end hash[feature[0]].last['diff'] = [original_res, chenged_res] end end rescue raise "Invalid FT Lines(#{$!}) in #{entry_id}:, \n'#{self.get('FT')}'\n" end @data['FT'] = hash end
returns a Array of gene names in the GN line.
# File lib/bio/db/embl/sptr.rb, line 264 def gene_names gn # set @data['GN'] if it hasn't been already done if @data['GN'].first.class == Hash then @data['GN'].collect { |element| element[:name] } else @data['GN'].first end end
returns gene names in the GN line.
where <gene record> is:
{ :name => '...',
:synonyms => [ 's1', 's2', ... ],
:loci => [ 'l1', 'l2', ... ],
:orfs => [ 'o1', 'o2', ... ]
}
Old format:
# File lib/bio/db/embl/sptr.rb, line 188 def gn unless @data['GN'] case fetch('GN') when /Name=/,/ORFNames=/ @data['GN'] = gn_uniprot_parser else @data['GN'] = gn_old_parser end end @data['GN'] end
Bio::SPTR#hi #=> hash
# File lib/bio/db/embl/sptr.rb, line 529 def hi unless @data['HI'] @data['HI'] = [] fetch('HI').split(/\. /).each do |hlist| hash = {'Category' => '', 'Keywords' => [], 'Keyword' => ''} hash['Category'], hash['Keywords'] = hlist.split(': ') hash['Keywords'] = hash['Keywords'].split('; ') hash['Keyword'] = hash['Keywords'].pop hash['Keyword'].sub!(/\.$/, '') @data['HI'] << hash end end @data['HI'] end
returns a Hash of the ID line.
returns a content (Int or String) of the ID line by a given key. Hash keys: [‘ENTRY_NAME’, ‘DATA_CLASS’, ‘MODECULE_TYPE’, ‘SEQUENCE_LENGTH’]
ID P53_HUMAN STANDARD; PRT; 393 AA.
#"ID #{ENTRY_NAME} #{DATA_CLASS}; #{MOLECULE_TYPE}; #{SEQUENCE_LENGTH}."
obj.id_line #=> {"ENTRY_NAME"=>"P53_HUMAN", "DATA_CLASS"=>"STANDARD",
"SEQUENCE_LENGTH"=>393, "MOLECULE_TYPE"=>"PRT"}
obj.id_line('ENTRY_NAME') #=> "P53_HUMAN"
# File lib/bio/db/embl/sptr.rb, line 63 def id_line(key = nil) return id_line[key] if key return @data['ID'] if @data['ID'] part = @orig['ID'].split(/ +/) @data['ID'] = { 'ENTRY_NAME' => part[1], 'DATA_CLASS' => part[2].sub(/;/,''), 'MOLECULE_TYPE' => part[3].sub(/;/,''), 'SEQUENCE_LENGTH' => part[4].to_i } end
returns a MOLECULE_TYPE in the ID line.
A short-cut for Bio::SPTR#id_line(‘MOLECULE_TYPE’).
# File lib/bio/db/embl/sptr.rb, line 89 def molecule id_line('MOLECULE_TYPE') end
OH NCBI_TaxID=TaxID; HostName. br.expasy.org/sprot/userman.html#OH_line
# File lib/bio/db/embl/sptr.rb, line 358 def oh unless @data['OH'] @data['OH'] = fetch('OH').split("\. ").map {|x| if x =~ /NCBI_TaxID=(\d+);/ taxid = $1 else raise ArgumentError, ["Error: Invalid OH line format (#{self.entry_id}):", $!, "\n", get('OH'), "\n"].join end if x =~ /NCBI_TaxID=\d+; (.+)/ host_name = $1 host_name.sub!(/\.$/, '') else host_name = nil end {'NCBI_TaxID' => taxid, 'HostName' => host_name} } end @data['OH'] end
returns a Array of Hashs or a String of the OS line when a key given.
[{'name' => '(Human)', 'os' => 'Homo sapiens'},
{'name' => '(Rat)', 'os' => 'Rattus norveticus'}]
{'name' => "(Human)", 'os' => 'Homo sapiens'}
OS Genus species (name). OS Genus species (name0) (name1). OS Genus species (name0) (name1). OS Genus species (name0), G s0 (name0), and G s (name0) (name1). OS Homo sapiens (Human), and Rarrus norveticus (Rat) OS Hippotis sp. Clark and Watts 825. OS unknown cyperaceous sp.
# File lib/bio/db/embl/sptr.rb, line 297 def os(num = nil) unless @data['OS'] os = Array.new fetch('OS').split(/, and|, /).each do |tmp| if tmp =~ /(\w+ *[\w\d \:\'\+\-\.]+[\w\d\.])/ org = $1 tmp =~ /(\(.+\))/ os.push({'name' => $1, 'os' => org}) else raise "Error: OS Line. #{$!}\n#{fetch('OS')}\n" end end @data['OS'] = os end if num # EX. "Trifolium repens (white clover)" return "#{@data['OS'][num]['os']} #{@data['OS'][num]['name']}" else return @data['OS'] end end
returns a Hash of oraganism taxonomy cross-references.
{'NCBI_TaxID' => ['1234','2345','3456','4567'], ...}
OX NCBI_TaxID=1234; OX NCBI_TaxID=1234, 2345, 3456, 4567;
# File lib/bio/db/embl/sptr.rb, line 341 def ox unless @data['OX'] tmp = fetch('OX').sub(/\.$/,'').split(/;/).map { |e| e.strip } hsh = Hash.new tmp.each do |e| db,refs = e.split(/=/) hsh[db] = refs.split(/, */) end @data['OX'] = hsh end return @data['OX'] end
returns the proposed official name of the protein.
"DE #{OFFICIAL_NAME} (#{SYNONYM})"
"DE #{OFFICIAL_NAME} (#{SYNONYM}) [CONTEINS: #1; #2]."
OFFICIAL_NAME 1/entry
SYNONYM >=0
CONTEINS >=0
# File lib/bio/db/embl/sptr.rb, line 144 def protein_name name = "" if de_line = fetch('DE') then str = de_line[/^[^\[]*/] # everything preceding the first [ (the "contains" part) name = str[/^[^(]*/].strip name << ' (Fragment)' if str =~ /fragment/i end return name end
returns contents in the R lines.
where <reference information Hash> is:
{'RN' => '', 'RC' => '', 'RP' => '', 'RX' => '',
'RA' => '', 'RT' => '', 'RL' => '', 'RG' => ''}
R Lines
# File lib/bio/db/embl/sptr.rb, line 394 def ref unless @data['R'] @data['R'] = [get('R').split(/\nRN /)].flatten.map { |str| hash = {'RN' => '', 'RC' => '', 'RP' => '', 'RX' => '', 'RA' => '', 'RT' => '', 'RL' => '', 'RG' => ''} str = 'RN ' + str unless /^RN / =~ str str.split("\n").each do |line| if /^(R[NPXARLCTG]) (.+)/ =~ line hash[$1] += $2 + ' ' else raise "Invalid format in R lines, \n[#{line}]\n" end end hash['RN'] = set_RN(hash['RN']) hash['RC'] = set_RC(hash['RC']) hash['RP'] = set_RP(hash['RP']) hash['RX'] = set_RX(hash['RX']) hash['RA'] = set_RA(hash['RA']) hash['RT'] = set_RT(hash['RT']) hash['RL'] = set_RL(hash['RL']) hash['RG'] = set_RG(hash['RG']) hash } end @data['R'] end
returns Bio::Reference object from Bio::EMBLDB::Common#ref.
# File lib/bio/db/embl/sptr.rb, line 488 def references unless @data['references'] ary = self.ref.map {|ent| hash = Hash.new('') ent.each {|key, value| case key when 'RA' hash['authors'] = value.split(/, /) when 'RT' hash['title'] = value when 'RL' if value =~ /(.*) (\d+) \((\d+)\), (\d+-\d+) \((\d+)\)$/ hash['journal'] = $1 hash['volume'] = $2 hash['issue'] = $3 hash['pages'] = $4 hash['year'] = $5 else hash['journal'] = value end when 'RX' # PUBMED, MEDLINE value.split('.').each {|item| tag, xref = item.split(/; /).map {|i| i.strip } hash[ tag.downcase ] = xref } end } Reference.new(hash) } @data['references'] = References.new(ary) end @data['references'] end
returns a Bio::Sequence::AA of the amino acid sequence.
blank Line; sequence data (>=1)
# File lib/bio/db/embl/sptr.rb, line 1135 def seq unless @data[''] @data[''] = Sequence::AA.new( fetch('').gsub(/ |\d+/,'') ) end return @data[''] end
returns a SEQUENCE_LENGTH in the ID line.
A short-cut for Bio::SPTR#id_line(‘SEQUENCE_LENGHT’).
# File lib/bio/db/embl/sptr.rb, line 98 def sequence_length id_line('SEQUENCE_LENGTH') end
returns a Hash of conteins in the SQ lines.
returns a value of a key given in the SQ lines.
'CRC64']
SQ SEQUENCE 233 AA; 25630 MW; 146A1B48A1475C86 CRC64; SQ SEQUENCE \d+ AA; \d+ MW; [0-9A-Z]+ CRC64;
MW, Dalton unit. CRC64 (64-bit Cyclic Redundancy Check, ISO 3309).
# File lib/bio/db/embl/sptr.rb, line 1107 def sq(key = nil) unless @data['SQ'] if fetch('SQ') =~ /(\d+) AA\; (\d+) MW; (.+) CRC64;/ @data['SQ'] = { 'aalen' => $1.to_i, 'MW' => $2.to_i, 'CRC64' => $3 } else raise "Invalid SQ Line: \n'#{fetch('SQ')}'" end end if key case key when /mw/, /molecular/, /weight/ @data['SQ']['MW'] when /len/, /length/, /AA/ @data['SQ']['aalen'] else @data['SQ'][key] end else @data['SQ'] end end
returns an array of synonyms (unofficial names).
synonyms are each placed in () following the official name on the DE line.
# File lib/bio/db/embl/sptr.rb, line 158 def synonyms ary = Array.new if de_line = fetch('DE') then line = de_line.sub(/\[.*\]/,'') # ignore stuff between [ and ]. That's the "contains" part line.scan(/\([^)]+/) do |synonym| unless synonym =~ /fragment/i then ary << synonym[1..-1].strip # index to remove the leading ( end end end return ary end