|
32 | 32 | import org.biojava3.alignment.template.AlignedSequence.Step; |
33 | 33 | import org.biojava3.alignment.template.Profile; |
34 | 34 | import org.biojava3.alignment.template.ProfileView; |
| 35 | +import org.biojava3.core.sequence.Strand; |
| 36 | +import org.biojava3.core.sequence.compound.AmbiguityDNACompoundSet; |
| 37 | +import org.biojava3.core.sequence.compound.AmbiguityRNACompoundSet; |
| 38 | +import org.biojava3.core.sequence.compound.DNACompoundSet; |
| 39 | +import org.biojava3.core.sequence.compound.RNACompoundSet; |
35 | 40 | import org.biojava3.core.sequence.location.template.Location; |
36 | 41 | import org.biojava3.core.sequence.template.Compound; |
37 | 42 | import org.biojava3.core.sequence.template.CompoundSet; |
@@ -147,6 +152,8 @@ protected SimpleProfile(Profile<S, C> query, Profile<S, C> target, List<Step> sx |
147 | 152 | length = sx.size(); |
148 | 153 | } |
149 | 154 |
|
| 155 | + // methods for Profile |
| 156 | + |
150 | 157 | @Override |
151 | 158 | public AlignedSequence<S, C> getAlignedSequence(int listIndex) { |
152 | 159 | return list.get(listIndex - 1); |
@@ -328,23 +335,164 @@ public boolean isCircular() { |
328 | 335 |
|
329 | 336 | @Override |
330 | 337 | public String toString(int width) { |
331 | | - // TODO String toString(int) |
332 | | - return null; |
| 338 | + return toString(width, null, getIDFormat(), true, true, true, true, true); |
333 | 339 | } |
334 | 340 |
|
335 | 341 | @Override |
336 | | - public String toString() { |
337 | | - // TODO handle circular alignments |
338 | | - StringBuilder s = new StringBuilder(); |
339 | | - for (AlignedSequence<S, C> as : list) { |
340 | | - s.append(String.format("%s%n", as.toString())); |
| 342 | + public String toString(StringFormat format) { |
| 343 | + switch (format) { |
| 344 | + case ALN: |
| 345 | + case CLUSTALW: |
| 346 | + default: |
| 347 | + return toString(60, String.format("CLUSTAL W MSA from BioJava%n%n"), getIDFormat() + " ", false, true, |
| 348 | + true, false, true); |
| 349 | + case FASTA: |
| 350 | + return toString(60, null, ">%s%n", false, false, false, false, false); |
| 351 | + case GCG: |
| 352 | + case MSF: |
| 353 | + String idFormat = getIDFormat(); |
| 354 | + StringBuilder header = new StringBuilder(); |
| 355 | + header.append(String.format("MSA from BioJava%n%n MSF: %d Type: %s Check: %d ..%n%n", getLength(), |
| 356 | + getGCGType(), getGCGChecksum())); |
| 357 | + for (AlignedSequence<S, C> as : list) { |
| 358 | + header.append(String.format(" Name: " + idFormat + " Len: %d Check: %4d Weight: %.1f%n", |
| 359 | + as.getAccession(), getLength(), getGCGChecksum(as), 1.0f)); // TODO show weights in MSF header |
| 360 | + } |
| 361 | + header.append(String.format("%n//%n%n")); |
| 362 | + // TODO? convert gap characters to '.' |
| 363 | + return toString(50, header.toString(), idFormat, false, false, true, false, false); |
341 | 364 | } |
342 | | - return s.toString(); |
343 | 365 | } |
344 | 366 |
|
| 367 | + // method from Object |
| 368 | + |
| 369 | + @Override |
| 370 | + public String toString() { |
| 371 | + return toString(getLength(), null, null, false, false, false, false, false); |
| 372 | + } |
| 373 | + |
| 374 | + // method for Iterable |
| 375 | + |
345 | 376 | @Override |
346 | 377 | public Iterator<AlignedSequence<S, C>> iterator() { |
347 | 378 | return list.iterator(); |
348 | 379 | } |
349 | 380 |
|
| 381 | + // helper methods |
| 382 | + |
| 383 | + // calculates GCG checksum for entire Profile |
| 384 | + private int getGCGChecksum() { |
| 385 | + int check = 0; |
| 386 | + for (AlignedSequence<S, C> as : list) { |
| 387 | + check += getGCGChecksum(as); |
| 388 | + } |
| 389 | + return check % 10000; |
| 390 | + } |
| 391 | + |
| 392 | + // calculates GCG checksum for a given Sequence |
| 393 | + private int getGCGChecksum(AlignedSequence<S, C> sequence) { |
| 394 | + String s = sequence.toString().toUpperCase(); |
| 395 | + int count = 0, check = 0; |
| 396 | + for (int i = 0; i < s.length(); i++) { |
| 397 | + count++; |
| 398 | + check += count * s.charAt(i); |
| 399 | + if (count == 57) { |
| 400 | + count = 0; |
| 401 | + } |
| 402 | + } |
| 403 | + return check % 10000; |
| 404 | + } |
| 405 | + |
| 406 | + // determines GCG type |
| 407 | + private String getGCGType() { |
| 408 | + CompoundSet<C> cs = getCompoundSet(); |
| 409 | + return (cs == DNACompoundSet.getDNACompoundSet() || cs == AmbiguityDNACompoundSet.getDNACompoundSet()) ? "D" : |
| 410 | + (cs == RNACompoundSet.getRNACompoundSet() || cs == AmbiguityRNACompoundSet.getRNACompoundSet()) ? "R" : |
| 411 | + "P"; |
| 412 | + } |
| 413 | + |
| 414 | + // creates format String for accession IDs |
| 415 | + private String getIDFormat() { |
| 416 | + int length = 0; |
| 417 | + for (AlignedSequence<S, C> as : list) { |
| 418 | + length = Math.max(length, (as.getAccession() == null) ? 0 : as.getAccession().toString().length()); |
| 419 | + } |
| 420 | + return (length == 0) ? null : "%-" + (length + 1) + "s"; |
| 421 | + } |
| 422 | + |
| 423 | + // creates formatted String |
| 424 | + private String toString(int width, String header, String idFormat, boolean seqIndexPre, boolean seqIndexPost, |
| 425 | + boolean interlaced, boolean aligIndices, boolean aligConservation) { |
| 426 | + // TODO handle circular alignments |
| 427 | + StringBuilder s = (header == null) ? new StringBuilder() : new StringBuilder(header); |
| 428 | + width = Math.max(1, width); |
| 429 | + int seqIndexPad = (int) (Math.floor(Math.log10(getLength())) + 2); |
| 430 | + String seqIndexFormatPre = "%" + seqIndexPad + "d ", seqIndexFormatPost = "%" + seqIndexPad + "d"; |
| 431 | + if (interlaced) { |
| 432 | + String aligIndFormat = "%-" + Math.max(1, width / 2) + "d %" + Math.max(1, width - (width / 2) - 1) + |
| 433 | + "d%n"; |
| 434 | + for (int i = 0; i < getLength(); i += width) { |
| 435 | + int start = i + 1, end = Math.min(getLength(), i + width); |
| 436 | + if (i > 0) { |
| 437 | + s.append(String.format("%n")); |
| 438 | + } |
| 439 | + if (aligIndices) { |
| 440 | + if (end < i + width) { |
| 441 | + int line = end - start + 1; |
| 442 | + aligIndFormat = "%-" + Math.max(1, line / 2) + "d %" + Math.max(1, line - (line / 2) - 1) + |
| 443 | + "d%n"; |
| 444 | + } |
| 445 | + if (idFormat != null) { |
| 446 | + s.append(String.format(idFormat, "")); |
| 447 | + } |
| 448 | + if (seqIndexPre) { |
| 449 | + s.append(String.format("%" + (seqIndexPad + 1) + "s", "")); |
| 450 | + } |
| 451 | + s.append(String.format(aligIndFormat, start, end)); |
| 452 | + } |
| 453 | + for (AlignedSequence<S, C> as : list) { |
| 454 | + if (idFormat != null) { |
| 455 | + s.append(String.format(idFormat, as.getAccession())); |
| 456 | + } |
| 457 | + if (seqIndexPre) { |
| 458 | + s.append(String.format(seqIndexFormatPre, as.getSequenceIndexAt(start))); |
| 459 | + } |
| 460 | + s.append(as.getSequenceAsString(start, end, Strand.UNDEFINED)); |
| 461 | + if (seqIndexPost) { |
| 462 | + s.append(String.format(seqIndexFormatPost, as.getSequenceIndexAt(end))); |
| 463 | + } |
| 464 | + s.append(String.format("%n")); |
| 465 | + } |
| 466 | + if (aligConservation) { |
| 467 | + if (idFormat != null) { |
| 468 | + s.append(String.format(idFormat, "")); |
| 469 | + } |
| 470 | + if (seqIndexPre) { |
| 471 | + s.append(String.format("%" + (seqIndexPad + 1) + "s", "")); |
| 472 | + } |
| 473 | + // TODO conservation annotation |
| 474 | + s.append(String.format("%n")); |
| 475 | + } |
| 476 | + } |
| 477 | + } else { |
| 478 | + for (AlignedSequence<S, C> as : list) { |
| 479 | + if (idFormat != null) { |
| 480 | + s.append(String.format(idFormat, as.getAccession())); |
| 481 | + } |
| 482 | + for (int i = 0; i < getLength(); i += width) { |
| 483 | + int start = i + 1, end = Math.min(getLength(), i + width); |
| 484 | + if (seqIndexPre) { |
| 485 | + s.append(String.format(seqIndexFormatPre, as.getSequenceIndexAt(start))); |
| 486 | + } |
| 487 | + s.append(as.getSequenceAsString(start, end, Strand.UNDEFINED)); |
| 488 | + if (seqIndexPost) { |
| 489 | + s.append(String.format(seqIndexFormatPost, as.getSequenceIndexAt(end))); |
| 490 | + } |
| 491 | + s.append(String.format("%n")); |
| 492 | + } |
| 493 | + } |
| 494 | + } |
| 495 | + return s.toString(); |
| 496 | + } |
| 497 | + |
350 | 498 | } |
0 commit comments