<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE root>
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:ali="http://www.niso.org/schemas/ali/1.0/" article-type="research-article" dtd-version="1.2" xml:lang="en"><front><journal-meta><journal-id journal-id-type="publisher-id">Ecological genetics</journal-id><journal-title-group><journal-title xml:lang="en">Ecological genetics</journal-title><trans-title-group xml:lang="ru"><trans-title>Экологическая генетика</trans-title></trans-title-group></journal-title-group><issn publication-format="print">1811-0932</issn><issn publication-format="electronic">2411-9202</issn><publisher><publisher-name xml:lang="en">Eco-Vector</publisher-name></publisher></journal-meta><article-meta><article-id pub-id-type="publisher-id">642484</article-id><article-id pub-id-type="doi">10.17816/ecogen642484</article-id><article-id pub-id-type="edn">RMHFWD</article-id><article-categories><subj-group subj-group-type="toc-heading" xml:lang="en"><subject>Ecosystems metagenomics</subject></subj-group><subj-group subj-group-type="toc-heading" xml:lang="ru"><subject>Метагеномика экосистем</subject></subj-group><subj-group subj-group-type="article-type"><subject>Research Article</subject></subj-group></article-categories><title-group><article-title xml:lang="en">Rna-seq contamination as a metatranscriptomic data for screening of plant pests and symbionts</article-title><trans-title-group xml:lang="ru"><trans-title>Контаминация при RNA-Seq как метатранскриптомные данные для скрининга вредителей и симбионтов растений</trans-title></trans-title-group></title-group><contrib-group><contrib contrib-type="author"><contrib-id contrib-id-type="orcid">https://orcid.org/0000-0003-1624-6163</contrib-id><contrib-id contrib-id-type="spin">2730-5890</contrib-id><name-alternatives><name xml:lang="en"><surname>Zykin</surname><given-names>Pavel A.</given-names></name><name xml:lang="ru"><surname>Зыкин</surname><given-names>Павел Александрович</given-names></name></name-alternatives><address><country country="RU">Russian Federation</country></address><bio xml:lang="en"><p>Cand. Sci. (Biology)</p></bio><bio xml:lang="ru"><p>канд. биол. наук</p></bio><email>pavel.zykin@spbu.ru</email><xref ref-type="aff" rid="aff1"/></contrib><contrib contrib-type="author"><contrib-id contrib-id-type="orcid">https://orcid.org/0000-0002-9326-3170</contrib-id><contrib-id contrib-id-type="spin">7269-8240</contrib-id><name-alternatives><name xml:lang="en"><surname>Andreeva</surname><given-names>Elena A.</given-names></name><name xml:lang="ru"><surname>Андреева</surname><given-names>Елена Александровна</given-names></name></name-alternatives><address><country country="RU">Russian Federation</country></address><bio xml:lang="en"><p>Cand. Sci. (Biology)</p></bio><bio xml:lang="ru"><p>канд. биол. наук</p></bio><email>e.a.andreeva@spbu.ru</email><xref ref-type="aff" rid="aff1"/><xref ref-type="aff" rid="aff2"/></contrib><contrib contrib-type="author"><contrib-id contrib-id-type="orcid">https://orcid.org/0000-0002-7353-1107</contrib-id><contrib-id contrib-id-type="spin">1687-5757</contrib-id><name-alternatives><name xml:lang="en"><surname>Tsvetkova</surname><given-names>Natalia V.</given-names></name><name xml:lang="ru"><surname>Цветкова</surname><given-names>Наталья Владимировна</given-names></name></name-alternatives><address><country country="RU">Russian Federation</country></address><bio xml:lang="en"><p>Cand. Sci. (Biology)</p></bio><bio xml:lang="ru"><p>канд. биол. наук</p></bio><email>n.tswetkowa@spbu.ru</email><xref ref-type="aff" rid="aff1"/></contrib><contrib contrib-type="author"><contrib-id contrib-id-type="orcid">https://orcid.org/0009-0003-8092-9978</contrib-id><contrib-id contrib-id-type="spin">3791-9700</contrib-id><name-alternatives><name xml:lang="en"><surname>Bulanov</surname><given-names>Andrey N.</given-names></name><name xml:lang="ru"><surname>Буланов</surname><given-names>Андрей Николаевич</given-names></name></name-alternatives><address><country country="RU">Russian Federation</country></address><email>an.bulanov20002014@gmail.com</email><xref ref-type="aff" rid="aff2"/></contrib><contrib contrib-type="author"><contrib-id contrib-id-type="orcid">https://orcid.org/0000-0003-3423-8511</contrib-id><name-alternatives><name xml:lang="en"><surname>Voylokov</surname><given-names>Anatoly V.</given-names></name><name xml:lang="ru"><surname>Войлоков</surname><given-names>Анатолий Васильевич</given-names></name></name-alternatives><address><country country="RU">Russian Federation</country></address><bio xml:lang="en"><p>Dr. Sci. (Biology)</p></bio><bio xml:lang="ru"><p>д-р биол. наук</p></bio><email>av_voylokov@mail.ru</email><xref ref-type="aff" rid="aff2"/></contrib></contrib-group><aff-alternatives id="aff1"><aff><institution xml:lang="en">Saint Petersburg State University</institution></aff><aff><institution xml:lang="ru">Санкт-Петербургский государственный университет</institution></aff></aff-alternatives><aff-alternatives id="aff2"><aff><institution xml:lang="en">Institute of General Genetics</institution></aff><aff><institution xml:lang="ru">Институт общей генетики Российской академии наук</institution></aff></aff-alternatives><pub-date date-type="preprint" iso-8601-date="2025-09-18" publication-format="electronic"><day>18</day><month>09</month><year>2025</year></pub-date><pub-date date-type="pub" iso-8601-date="2025-12-15" publication-format="electronic"><day>15</day><month>12</month><year>2025</year></pub-date><volume>23</volume><issue>3</issue><issue-title xml:lang="en"/><issue-title xml:lang="ru"/><fpage>235</fpage><lpage>247</lpage><history><date date-type="received" iso-8601-date="2024-12-02"><day>02</day><month>12</month><year>2024</year></date><date date-type="accepted" iso-8601-date="2025-09-18"><day>18</day><month>09</month><year>2025</year></date></history><permissions><copyright-statement xml:lang="en">Copyright ©; 2025, Eco-Vector</copyright-statement><copyright-statement xml:lang="ru">Copyright ©; 2025, Эко-Вектор</copyright-statement><copyright-year>2025</copyright-year><copyright-holder xml:lang="en">Eco-Vector</copyright-holder><copyright-holder xml:lang="ru">Эко-Вектор</copyright-holder><license><ali:license_ref xmlns:ali="http://www.niso.org/schemas/ali/1.0/">https://eco-vector.com/for_authors.php#07</ali:license_ref></license></permissions><self-uri xlink:href="https://journals.eco-vector.com/ecolgenet/article/view/642484">https://journals.eco-vector.com/ecolgenet/article/view/642484</self-uri><abstract xml:lang="en"><p><bold>Background:</bold> Transcriptome sequencing data can contain up to 30 % contaminating reads. These may originate from laboratory contamination or biologically relevant sources, amenable to metatranscriptomics analysis.</p> <p><bold>Aim:</bold> To evaluate the utility of contaminating reads for large-scale screening of plant pests and symbionts.</p> <p><bold>Methods:</bold> We analyzed the data of RNA-seq experiments of rye (Secale cereale L.) including five in-house accessions and 50 public datasets from NCBI SRA archive. Reads with good mapping to the rye genome were filtered out, retaining putative contaminats for downstream analysis.</p> <p><bold>Results:</bold> After removing laboratory contaminants, we compared aphids, symbiotic fungi, bacteria and viruses across accessions. Symbiome-derived reads were reproducible in biological replicates and varied by location, condition, and plant species, enabling post-hoc metatranscriptomic analysis.</p> <p><bold>Conclusions:</bold> Contaminating reads correlated with field-observed species or expected symbionts. Distribution patterns across accessions support repurposing existing and future sequencing data to screen for plant pests, monitor symbiotic organisms, and plan eradication strategies amid global climate change.</p></abstract><trans-abstract xml:lang="ru"><p><bold>Обоснование.</bold> Данные секвенирования транскриптома могут содержать до 30% контаминирующих прочтений. Их происхождение может быть связано либо с лабораторной контаминацией, либо с биологически значимыми сигналами, которые можно исследовать методами метатранскриптомики.</p> <p><bold>Цель исследования.</bold> Оценить возможность использования загрязняющих прочтений для масштабного скрининга вредителей растений и симбионтов.</p> <p><bold>Методы.</bold> Проанализированы данные РНК-секвенирования ржи (Secale cereale L.), включая собственные эксперименты (5 образцов) и общедоступные наборы данных (50 образцов) из архива биоинформационной базы NCBI SRA (Sequence Read Archive from the National Center for Biotechnology Information). Прочтения, значимо картировавшиеся на геном ржи, были отфильтрованы; оставшиеся потенциальные загрязняющие прочтения подвергнуты анализу.</p> <p><bold>Результаты.</bold> После исключения очевидных лабораторных контаминантов проведено сравнение встречаемости тлей, симбиотических грибов, бактерий и вирусов между образцами. Чтения, происходящие из симбиома, воспроизводимы в биологических повторностях и варьируют в зависимости от локации/условий/вида растения, что позволяет использовать их для последующего метатранскриптомного анализа.</p> <p><bold>Заключение.</bold> Результаты демонстрируют корреляцию между видами, обнаруженными в полевых условиях или предполагаемыми к присутствию, и загрязняющими прочтениями. Распределение различных видов в образцах из одних и разных локаций подтверждает целесообразность использования существующих и будущих архивных данных секвенирования для скрининга распространения вредителей растений, мониторинга новых симбиотических организмов и планирования мер борьбы с ними в условиях глобального изменения климата.</p></trans-abstract><kwd-group xml:lang="en"><kwd>rye</kwd><kwd>plant disease monitoring</kwd><kwd>databases</kwd><kwd>metatranscriptomics</kwd></kwd-group><kwd-group xml:lang="ru"><kwd>рожь</kwd><kwd>мониторинг болезней растений</kwd><kwd>базы данных</kwd><kwd>метатранскриптомика</kwd></kwd-group><funding-group><award-group><funding-source><institution-wrap><institution xml:lang="en">Government of the Russian Federation</institution></institution-wrap><institution-wrap><institution xml:lang="ru">Правительство Российской Федерации</institution></institution-wrap></funding-source><award-id>21877-П8-ДЧ</award-id></award-group></funding-group></article-meta></front><body></body><back><ref-list><ref id="B1"><label>1.</label><mixed-citation>Sangiovanni M, Granata I, Thind AS, Guarracino MR. From trash to treasure: detecting unexpected contamination in unmapped NGS data. BMC Bioinformatics. 2019;20:168. doi: 10.1186/s12859-019-2684-x</mixed-citation></ref><ref id="B2"><label>2.</label><mixed-citation>Simion P, Belkhir K, François C, et al. A software tool ‘CroCo’ detects pervasive cross-species contamination in next generation sequencing data. BMC Biology. 2018;16:28. doi: 10.1186/s12915-018-0486-7</mixed-citation></ref><ref id="B3"><label>3.</label><mixed-citation>Chen S, Zhou Y, Chen Y, Gu J. fastp: an ultra-fast all-in-one FASTQ preprocessor. Bioinformatics. 2018;34(17):i884–i890. doi: 10.1093/bioinformatics/bty560</mixed-citation></ref><ref id="B4"><label>4.</label><mixed-citation>Rabanus-Wallace MT, Hackauf B, Mascher M, et al. Chromosome-scale genome assembly provides insights into rye biology, evolution and agronomic potential. Nat Genet. 2021;53:564–573. doi: 10.1038/s41588-021-00807-0</mixed-citation></ref><ref id="B5"><label>5.</label><mixed-citation>Bushnell B. BBMap: A fast, accurate, splice-aware aligner. USA: Department of Energy. Office of Science; 2014.</mixed-citation></ref><ref id="B6"><label>6.</label><mixed-citation>Bushmanova E, Antipov D, Lapidus A, Prjibelski A.D. rnaSPAdes: a de novo transcriptome assembler and its application to RNA-Seq data. Gigascience. 2019;8(9):giz100. doi: 10.1093/gigascience/giz100</mixed-citation></ref><ref id="B7"><label>7.</label><mixed-citation>NCBI Resource Coordinators. Database resources of the National Center for Biotechnology Information. Nucleic Acids Res. 2018;46(D1):D8–D13. doi: 10.1093/nar/gkx1095</mixed-citation></ref><ref id="B8"><label>8.</label><mixed-citation>Wingett SW, Andrews S. FastQ Screen: A tool for multi-genome mapping and quality control. F1000Res. 2018;7:1338. doi: 10.12688/f1000research.15931.2</mixed-citation></ref><ref id="B9"><label>9.</label><mixed-citation>Lafond-Lapalme J, Duceppe M-O, Wang S, et al. A new method for decontamination of de novo transcriptomes using a hierarchical clustering algorithm. Bioinformatics. 2017;33(9):1293–1300. doi: 10.1093/bioinformatics/btw793</mixed-citation></ref><ref id="B10"><label>10.</label><mixed-citation>Chen Y, Singh A, Kaithakottil GG, et al. An aphid RNA transcript migrates systemically within plants and is a virulence factor. PNAS. 2020;117(23):12763–12771. doi: 10.1073/pnas.1918410117</mixed-citation></ref><ref id="B11"><label>11.</label><mixed-citation>Salter SJ, Cox MJ, Turek EM, et al. Reagent and laboratory contamination can critically impact sequence-based microbiome analyses. BMC Biol. 2014;12:87. doi: 10.1186/s12915-014-0087</mixed-citation></ref><ref id="B12"><label>12.</label><mixed-citation>Berim MN. The most harmful species of aphids in the north-west of Russia. Plant protection and quarantine. 2014;(9):29–30. EDN: SJYWIX</mixed-citation></ref><ref id="B13"><label>13.</label><mixed-citation>van Kleeff PJM, Galland M, Schuurink RC, Bleeker PM. Small RNAs from Bemisia tabaci are transferred to Solanum lycopersicum phloem during feeding. Front Plant Sci. 2016;7:1759. doi: 10.3389/fpls.2016.01759</mixed-citation></ref><ref id="B14"><label>14.</label><mixed-citation>Su Y-L, Li J-M, Li M, et al. Transcriptomic analysis of the salivary glands of an invasive whitefly. PLoS One. 2012;7(6):e39303. doi: 10.1371/journal.pone.0039303</mixed-citation></ref><ref id="B15"><label>15.</label><mixed-citation>Ban L, Didon A, Jonsson LMV, et al. An improved detection method for the Rhopalosiphum padi virus (RhPV) allows monitoring of its presence in aphids and movement within plants. J Virol Methods. 2003;142(1–2): 136–142. doi: 10.1016/j.jviromet.2007.01.014</mixed-citation></ref><ref id="B16"><label>16.</label><mixed-citation>Zhao S, Ye Z, Stanton R. Misuse of RPKM or TPM normalization when comparing across samples and sequencing protocols. RNA. 2020;26(8): 903–909. doi: 10.1261/rna.074922.120</mixed-citation></ref><ref id="B17"><label>17.</label><mixed-citation>Zhao Y, Li M–C, Konaté MM, et al. TPM, FPKM, or normalized counts? A comparative study of quantification measures for the analysis of RNA-seq data from the NCI patient-derived models repository. J Transl Med. 2021;19:296. doi: 10.1186/s12967-021-02936-w</mixed-citation></ref><ref id="B18"><label>18.</label><mixed-citation>Mukherjee A, Reddy MS. Metatranscriptomics: an approach for retrieving novel eukaryotic genes from polluted and related environments. 3 Biotech. 2020;10:71. doi: 10.1007/s13205-020-2057-1</mixed-citation></ref><ref id="B19"><label>19.</label><mixed-citation>Shakya M, Lo C–C, Chain PSG. Advances and challenges in metatranscriptomic analysis. Front Genet. 2019;10:904. doi: 10.3389/fgene.2019.00904</mixed-citation></ref><ref id="B20"><label>20.</label><mixed-citation>Barton HA, Taylor NM, Lubbers BR, Pemberton AC. DNA extraction from low-biomass carbonate rock: An improved method with reduced contamination and the low-biomass contaminant database. J Microbiol Methods. 2006;66(1):21–31. doi: 10.1016/j.mimet.2005.10.005</mixed-citation></ref><ref id="B21"><label>21.</label><mixed-citation>Wally N, Schneider M, Thannesberger J, et al. Plasmid DNA contaminant in molecular reagents. Sci Rep. 2019;9:1652. doi: 10.1038/s41598-019-38733-1</mixed-citation></ref><ref id="B22"><label>22.</label><mixed-citation>Weyrich LS, Farrer AG, Eisenhofer R, et al. Laboratory contamination over time during low-biomass sample analysis. Mol Ecol Resour. 2019;19(4):982–996. doi: 10.1111/1755-0998.13011</mixed-citation></ref><ref id="B23"><label>23.</label><mixed-citation>Christensen GJM, Brüggemann H. Bacterial skin commensals and their role as host guardians. Benef Microbes. 2014;5(2):201–215. doi: 10.3920/BM2012.0062</mixed-citation></ref></ref-list></back></article>
