assets/Text.js

const Asset = require('./Asset');
let iconv;
let iconvLite;

try {
  iconv = require('iconv');
} catch (e) {
  try {
    iconvLite = require('iconv-lite');
  } catch (e2) {}
}

/**
 * Adds text encoding and decoding support to {@link Asset}. Serves as a
 * superclass for {@link Html}, {@link Xml}, {@link Css}, {@link JavaScript},
 * {@link Json}, and {@link CacheManifest}.
 *
 * In addition to the [config]{@link AssetConfig} already supported by the Asset base
 * class, the options `text` and `encoding` are also available
 *
 * @example
 *     var textAsset = new Text({
 *         // "æøå" in iso-8859-1:
 *         rawSrc: Buffer.from([0xe6, 0xf8, 0xe5]),
 *         encoding: "iso-8859-1"
 *     });
 *     textAsset.text; // "æøå" (decoded JavaScript string)
 *     textAsset.encoding = 'utf-8';
 *     textAsset.rawSrc; // <Buffer c3 a6 c3 b8 c3 a5>
 *
 * @class Text
 * @extends Asset
 * @param {AssetConfig} config The Text assets configuration
 *
 * @param {String} [config.text] The decoded source of the asset. Can be used
 * instead of `rawSrc` and `rawSrcProxy`.
 *
 * @param {String} [config.encoding="utf-8"] Used to decode and reencode the {@link Asset#rawSrc}.
 * Can be any encoding supported by the `iconv` module. Can be changed later
 * using {@link Text#encoding}. See the docs for {@link Text#defaultEncoding}.
 * If the asset is loaded via http, the encoding will be read from the `Content-Type`,
 * and likewise for `data:` urls.
 *
 * @param {AssetGraph} assetGraph Mandatory AssetGraph instance references
 */
class Text extends Asset {
  /**
   * Get or set the encoding (charset) used for re-encoding the raw
   * source of the asset. To affect the initial decoding of the
   * `rawSrc` option, provide the `encoding` option to the
   * constructor.
   *
   * @type {String}
   */
  get encoding() {
    if (!this._encoding) {
      this._encoding = this.defaultEncoding;
    }
    return this._encoding;
  }

  set encoding(encoding) {
    if (encoding !== this.encoding) {
      /*eslint-disable*/
      const text = this.text; // Make sure this._text exists so the rawSrc is decoded before the original encoding is thrown away
      /* eslint-enable */
      this._rawSrc = undefined;
      this._encoding = encoding;
      this.markDirty();
    }
  }

  get rawSrc() {
    if (!this._rawSrc) {
      let error;

      if (typeof this._text === 'string' || this._parseTree) {
        if (/^utf-?8$/i.test(this.encoding)) {
          this._updateRawSrcAndLastKnownByteLength(
            Buffer.from(this.text, 'utf-8')
          );
        } else if (/^(?:us-?)?ascii$/i.test(this.encoding)) {
          this._updateRawSrcAndLastKnownByteLength(
            Buffer.from(this.text, 'ascii')
          );
        } else if (iconv) {
          try {
            this._updateRawSrcAndLastKnownByteLength(
              new iconv.Iconv('utf-8', this.encoding).convert(this.text)
            );
          } catch (err) {
            err.message = `iconv: Converting ${this.url} from UTF-8 to ${this.encoding} failed:\n${err.message}`;
            if (this.assetGraph) {
              if (err.code === 'EILSEQ') {
                err.message +=
                  '\nTransliterating and ignoring further failures. Data corruption may occur.';
                this._updateRawSrcAndLastKnownByteLength(
                  new iconv.Iconv(
                    'utf-8',
                    `${this.encoding}//TRANSLIT//IGNORE`
                  ).convert(this.text)
                );
              }
              this.assetGraph.warn(err);
            } else {
              throw err;
            }
          }
        } else if (iconvLite) {
          try {
            this._updateRawSrcAndLastKnownByteLength(
              iconvLite.encode(this.text, this.encoding)
            );
          } catch (err) {
            err.message = `iconv: Converting ${this.url} from UTF-8 to ${this.encoding} failed:\n${err.message}`;
            if (this.assetGraph) {
              this.assetGraph.warn(err);
            } else {
              throw err;
            }
          }
        } else {
          error = new Error(
            `iconv and iconv-lite not found. Cannot encode ${this} as ${this.encoding}. ` +
              `Please run \`npm install iconv\` or \`npm install iconv-lite\` and try again`
          );
          error.asset = this;
          throw error;
        }
      } else {
        error = new Error(
          'Text.rawSrc getter: No _rawSrc or _text property found, asset not loaded?'
        );
        error.asset = this;

        throw error;
      }
    }
    return this._rawSrc;
  }

  set rawSrc(rawSrc) {
    this.unload();
    this._parseTree = undefined;
    this._text = undefined;
    this._updateRawSrcAndLastKnownByteLength(rawSrc);
    if (this.assetGraph) {
      this.populate();
    }
    this.markDirty();
  }

  /**
   * Get or set the decoded text contents of the of the asset as a
   * JavaScript string. Unlike browsers AssetGraph doesn't try to
   * sniff the charset of your text-based assets. It will fall back
   * to assuming utf-8 if it's unable to determine the
   * encoding/charset from HTTP headers, `<meta
   * http-equiv='Content-Type'>` tags ({@link Html}), `@charset` ({@link Css}), so
   * if for some reason you're not using utf-8 for all your
   * text-based assets, make sure to provide those hints. Other
   * asset types provide no standard way to specify the charset
   * within the file itself, so presently there's no way to load
   * eg. JavaScript from disc if it's not utf-8 or ASCII, except by
   * overriding `Text.prototype.defaultEncoding` globally.
   *
   * If the internal state has been changed since the asset was
   * initialized, it will automatically be reserialized when the
   * `text` property is retrieved, for example:
   *
   *     var htmlAsset = new Html({
   *         rawSrc: Buffer.from("<body>hello</body>");
   *     });
   *     htmlAsset.text; // "<body>hello</body>"
   *     htmlAsset.parseTree.body.innerHTML = "bye";
   *     htmlAsset.markDirty();
   *     htmlAsset.text; // "<body>bye</body>"
   *
   * Setting this property after the outgoing relations have been
   * accessed currently leads to undefined behavior.
   *
   * @type {String}
   */
  get text() {
    if (typeof this._text !== 'string') {
      this._text = this._getTextFromRawSrc();
    }
    return this._text;
  }

  set text(text) {
    this.unload();
    this._text = text;
    this._lastKnownByteLength = undefined;
    if (this.assetGraph) {
      this.populate();
    }
    this.markDirty();
    this._text = text; // markDirty clears _text, but we know that it's still good
  }

  markDirty() {
    if (this._parseTree) {
      this._text = undefined;
      this._sourceMap = undefined;
    }
    super.markDirty();
  }

  _getTextFromRawSrc() {
    if (!this.isLoaded) {
      throw new Error(
        `Text._getTextFromRawSrc(): Asset not loaded: ${this.urlOrDescription}`
      );
    }
    if (!this._rawSrc) {
      throw new Error(
        `Text._getTextFromRawSrc(): No _rawSrc property found: ${this.urlOrDescription}`
      );
    }
    if (/^utf-?8$/i.test(this.encoding)) {
      return this._rawSrc.toString('utf-8');
    } else if (/^(?:us-?)?ascii$/i.test(this.encoding)) {
      return this._rawSrc.toString('ascii');
    } else if (iconv) {
      return new iconv.Iconv(this.encoding, 'utf-8')
        .convert(this._rawSrc)
        .toString('utf-8');
    } else if (iconvLite) {
      return iconvLite.decode(this._rawSrc, this.encoding);
    } else {
      const error = new Error(
        `iconv and iconv-lite not found. Cannot decode ${this} (encoding is ${this.encoding}). ` +
          `Please run \`npm install iconv\` or \`npm install iconv-lite\` and try again`
      );
      error.asset = this;

      throw error;
    }
  }
}

Object.assign(Text.prototype, {
  /**
   * Property that's true for all Text instances. Avoids reliance on
   * the `instanceof` operator.
   *
   * @member Text#isText
   * @type {Boolean}
   * @default true
   */
  isText: true,

  /**
   * The default encoding for the Text (sub)class. Used for decoding
   * the raw source when the encoding cannot be determined by other
   * means, such as a `Content-Type` header (when the asset was
   * fetched via http), or another indicator specific to the given
   * asset type (`@charset` for Css, `<meta
   * http-equiv="Content-Type" ...>` for Html).
   *
   * Factory setting is "utf-8", but you can override it by setting
   * `Text.prototype.defaultEncoding` to another value supported by
   * the `iconv` of `iconv-lite` modules.
   *
   * @member Text#defaultEncoding
   * @type {String}
   * @default "utf-8"
   */
  defaultEncoding: 'utf-8',

  supportedExtensions: [
    '.txt',
    '.htaccess',
    '.md',
    '.rst',
    '.ics',
    '.csv',
    '.tsv',
    '.xtemplate', // For Ext.XTemplate + GETTEXT
  ],

  contentType: 'text/plain',
});

module.exports = Text;